Skip to content

Commit

Permalink
RNG-187: Add a benchmark of a 64-bit paired shuffle
Browse files Browse the repository at this point in the history
  • Loading branch information
aherbert committed Oct 17, 2024
1 parent a61f9e1 commit 8f5dcea
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,8 @@ public class ArrayShuffleBenchmark {
* method used (-bound % bound) for (2^L % bound) which only works for unsigned integer
* modulus. */
private static final long POW_32 = 1L << 32;
/** 2^30. Length threshold to sample 2 integers from a random 64-bit value. */
private static final int POW_30 = 1 << 30;
/** 2^15. Length threshold to sample 2 integers from a random 32-bit value. */
private static final int POW_15 = 1 << 15;
/** 2^9. Length threshold to sample 3 integers from a random 32-bit value. */
Expand Down Expand Up @@ -186,6 +188,8 @@ public void setup() {
fun = ArrayShuffleBenchmark::shuffle2;
} else if ("shuffle2a".equals(method)) {
fun = ArrayShuffleBenchmark::shuffle2a;
} else if ("shuffle2L".equals(method)) {
fun = ArrayShuffleBenchmark::shuffle2L;
} else if ("shuffle2range".equals(method)) {
fun = (rng, a) -> ArrayShuffleBenchmark.shuffle2(rng, a, 0, a.length);
} else if ("shuffle3".equals(method)) {
Expand Down Expand Up @@ -438,6 +442,132 @@ static int[] shuffle2a(UniformRandomProvider rng, int[] array) {
return array;
}

/**
* Return two random values in {@code [0, range1)} and {@code [0, range2)}. The
* product bound is used for the reject algorithm. See Brackett-Rozinsky and Lemire.
*
* <p>The product bound can be any product {@code >= range1*range2} <em>but</em>
* it must be created using {@link #createBound(int, int)}.
* It may be updated to become {@code createBound(range1, range2)}.
*
* <p>This uses {@link UniformRandomProvider#nextLong()} as a 64-bit source of randomness.
*
* @param range1 Range 1.
* @param range2 Range 2.
* @param productBound Product bound.
* @param rng Source of randomness.
* @return [i1, i2]
*/
static int[] randomBounded2L(int range1, int range2, long[] productBound, UniformRandomProvider rng) {
long bits = rng.nextLong();
// 96-bit product of bits * range1 into lo 32-bits and hi 64-bits
long lo = (bits & MASK_32) * range1;
long hi = (bits >>> 32) * range1 + (lo >>> 32);
// result1 and result2 are the top 32-bits of the long
long r1 = hi;
// Leftover lower 64-bits * range2
// bits = (hi << 32) | (lo & MASK_32)
lo = (lo & MASK_32) * range2;
hi = (hi & MASK_32) * range2 + (lo >>> 32);
long r2 = hi;
// Leftover bits must be compared as unsigned
bits = ((hi << 32) | (lo & MASK_32)) + Long.MIN_VALUE;
if (bits < productBound[0]) {
// Do not use createBound as the actual product is required for the modulus
final long bound = (long) range1 * range2;
productBound[0] = bound + Long.MIN_VALUE;
if (bits < bound + Long.MIN_VALUE) {
// 2^64 % bound = (2^64 - bound) % bound
final long t = remainderUnsigned(-bound, bound) + Long.MIN_VALUE;
while (bits < t) {
bits = rng.nextLong();
lo = (bits & MASK_32) * range1;
hi = (bits >>> 32) * range1 + (lo >>> 32);
r1 = hi;
lo = (lo & MASK_32) * range2;
hi = (hi & MASK_32) * range2 + (lo >>> 32);
r2 = hi;
bits = ((hi << 32) | (lo & MASK_32)) + Long.MIN_VALUE;
}
}
}
// Convert to [0, range1), [0, range2)
return new int[] {(int) (r1 >> 32), (int) (r2 >> 32)};
}

/**
* Creates the bound for sampling two values from {@code range1} and {@code range2}.
* This returns the product updated for use in an unsigned compare by adding
* {@link Long#MIN_VALUE}.
*
* @param range1 Range 1.
* @param range2 Range 2.
* @return the bound
*/
static long createBound(int range1, int range2) {
return (long) range1 * range2 + Long.MIN_VALUE;
}

/**
* Returns the unsigned remainder from dividing the first argument
* by the second where each argument and the result is interpreted
* as an unsigned value.
*
* <p>Taken from {@code o.a.c.numbers.core.ArithmeticUtils}. The code has been
* adjusted as the divisor is known to be strictly positive.
*
* @param dividend the value to be divided
* @param divisor the value doing the dividing (must be positive)
* @return the unsigned remainder of the first argument divided by
* the second argument.
* @see Long#remainderUnsigned(long, long)
*/
private static long remainderUnsigned(long dividend, long divisor) {
// Note: Removed check if (divisor < 0)

// From Hacker's Delight 2.0, section 9.3
final long q = ((dividend >>> 1) / divisor) << 1;
final long r = dividend - q * divisor;
// unsigned r: 0 <= r < 2 * divisor
// if (r < divisor): r
// else: r - divisor

// The compare of unsigned r can be done using:
// return (r + Long.MIN_VALUE) < (divisor | Long.MIN_VALUE) ? r : r - divisor

// Here we subtract divisor if (r - divisor) is positive, else the result is r.
// This can be done by flipping the sign bit and
// creating a mask as -1 or 0 by signed shift.
return r - (divisor & (~(r - divisor) >> 63));
}

/**
* Shuffles the entries of the given array.
*
* @param rng Source of randomness.
* @param array Array whose entries will be shuffled (in-place).
* @return a reference to the given array
*/
static int[] shuffle2L(UniformRandomProvider rng, int[] array) {
int i = array.length;
// The threshold provided in the Brackett-Rozinsky and Lemire paper
// is the power of 2 below 1,358,187,913. Note that the product 2^30*2^30
// is representable using signed longs.
for (; i > POW_30; i--) {
swap(array, i - 1, rng.nextInt(i));
}
// Batches of 2 for sizes up to 2^30 elements
final long[] productBound = {createBound(i, i - 1)};
for (; i > 1; i -= 2) {
final int[] indices = randomBounded2L(i, i - 1, productBound, rng);
final int index1 = indices[0];
final int index2 = indices[1];
swap(array, i - 1, index1);
swap(array, i - 2, index2);
}
return array;
}

/**
* Return three random values in {@code [0, range1)}, {@code [0, range2)}
* and {@code [0, range3)}. The
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,34 @@ void testBoundedRandom2a(int range1, int range2) {
}
}

@ParameterizedTest
@CsvSource({
"13, 12257",
"4242, 9899",
"56712332, 987914719",
"1267381263, 92567572",
})
void testBoundedRandom2L(int range1, int range2) {
Assertions.assertTrue(Long.compareUnsigned((long) range1 * range2, 1L << 63) < 0, "Product must be less than 2^63");

final int samples = 1000000;
final int bins = 8;
final long[][] observed = new long[bins][bins];
final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(SEED);
final long[] productBound = {ArrayShuffleBenchmark.createBound(range1, range2)};
final int width1 = (int) Math.ceil((double) range1 / bins);
final int width2 = (int) Math.ceil((double) range2 / bins);
for (int i = 0; i < samples; i++) {
final int[] indices = ArrayShuffleBenchmark.randomBounded2L(range1, range2, productBound, rng);
final int index1 = indices[0] / width1;
final int index2 = indices[1] / width2;
observed[index1][index2]++;
}

final double p = new ChiSquareTest().chiSquareTest(observed);
Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
}

@ParameterizedTest
@CsvSource({
"257",
Expand All @@ -109,6 +137,20 @@ void testShuffle2(int length) {
assertShuffle(length, ArrayShuffleBenchmark::shuffle2);
}

@ParameterizedTest
@CsvSource({
"257",
"8073",
"57548",
// Do not approach threshold of 2^30. Just go higher
// than the 32-bit based shuffle but small enough to assert
// on a few samples.
"208476",
})
void testShuffle2L(int length) {
assertShuffle(length, ArrayShuffleBenchmark::shuffle2L);
}

@ParameterizedTest
@CsvSource({
"257",
Expand Down

0 comments on commit 8f5dcea

Please sign in to comment.