Skip to content

Commit

Permalink
RNG-187: Add benchmark for array shuffle
Browse files Browse the repository at this point in the history
  • Loading branch information
aherbert committed Aug 23, 2024
1 parent 48a363f commit ec2b0b3
Show file tree
Hide file tree
Showing 2 changed files with 383 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,291 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.apache.commons.rng.examples.jmh.sampling;

import java.util.concurrent.TimeUnit;
import java.util.function.BiConsumer;
import java.util.stream.IntStream;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.simple.RandomSource;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.Warmup;

/**
* Executes benchmark to compare the speed of shuffling an array.
*
* <p>Batched shuffle samples have been adapted from the blog post:
* <a href="https://lemire.me/blog/2024/08/17/faster-random-integer-generation-with-batching/">
* Daniel Lemire: Faster random integer generation with batching</a>.
* The samples provided in the blog and the referenced paper are for a 64-bit
* source of randomness which requires native support for 128-bit multiplication.
* These have been modified for a 32-bit source of randomness.
*
* <ul>
* <li>Nevin Brackett-Rozinsky, Daniel Lemire,
* Batched Ranged Random Integer Generation, Software: Practice and Experience (to appear)
* <a href="https://arxiv.org/abs/2408.06213">arXiv:2408.06213M</a>
* </ul>
*/
@BenchmarkMode(Mode.AverageTime)
@OutputTimeUnit(TimeUnit.NANOSECONDS)
@Warmup(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@Measurement(iterations = 5, time = 1, timeUnit = TimeUnit.SECONDS)
@State(Scope.Benchmark)
@Fork(value = 1, jvmArgs = { "-server", "-Xms128M", "-Xmx128M" })
public class ArrayShuffleBenchmark {
/** 2^32. Used for the bounded random algorithm. This is required as the original
* method used (-bound % bound) for (2^L % bound) which only works for unsigned integer
* modulus. */
private static final long POW_32 = 1L << 32;
/** 2^15. Length threshold to sample 2 integers from a random 32-bit value. */
private static final int POW_15 = 1 << 15;
/** Mask the lower 32-bit of a long. */
private static final long MASK_32 = 0xffffffffL;

/**
* The data for the shuffle. Contains the data size and the random generators.
*/
@State(Scope.Benchmark)
public static class ShuffleData {
/**
* The list size.
*
* <p>Note: The 32-bit based shuffle2 method has a size threshold of 2^15
* (32768) for creating two samples from each 32-bit random value.
* Speed-up is most obvious for arrays below this size.
*/
@Param({"4", "16", "64", "256", "1024", "4096", "8192", "16384", "32768", "65536", "262148", "1048592"})
private int size;

/** The data. */
private int[] data;

/**
* @return the data
*/
public int[] getData() {
return data;
}

/**
* Create the data.
*/
@Setup
public void setup() {
data = IntStream.range(0, size).toArray();
}
}

/**
* Defines the {@link RandomSource} for testing.
*/
@State(Scope.Benchmark)
public static class RngSource {
/**
* RNG providers.
*
* <p>Use different speeds.</p>
*
* @see <a href="https://commons.apache.org/proper/commons-rng/userguide/rng.html">
* Commons RNG user guide</a>
*/
@Param({"XO_RO_SHI_RO_128_PP",
//"MWC_256",
//"JDK"
})
private String randomSourceName;

/** RNG. */
private UniformRandomProvider rng;

/**
* Gets the source of randomness.
*
* @return RNG
*/
public UniformRandomProvider getRNG() {
return rng;
}

/**
* Look-up the {@link RngSource} from the name and instantiates the generator.
*/
@Setup
public void setup() {
rng = RandomSource.valueOf(randomSourceName).create();
}
}

/**
* Defines the shuffle method.
*/
@State(Scope.Benchmark)
public static class ShuffleMethod {
/**
* Method name.
*/
@Param({"shuffle", "shuffle2"})
private String method;

/** Shuffle function. */
private BiConsumer<UniformRandomProvider, int[]> fun;

/**
* Gets the source of randomness.
*
* @return RNG
*/
public BiConsumer<UniformRandomProvider, int[]> getMethod() {
return fun;
}

/**
* Look-up the {@link RngSource} from the name and instantiates the generator.
*/
@Setup
public void setup() {
if ("shuffle".equals(method)) {
fun = ArrayShuffleBenchmark::shuffle1;
} else if ("shuffle2".equals(method)) {
fun = ArrayShuffleBenchmark::shuffle2;
} else {
throw new IllegalStateException("Unknown shuffle method: " + method);
}
}
}

/**
* Swaps the two specified elements in the array.
*
* @param array Array.
* @param i First index.
* @param j Second index.
*/
private static void swap(int[] array, int i, int j) {
final int tmp = array[i];
array[i] = array[j];
array[j] = tmp;
}

/**
* Shuffles the entries of the given array.
* Uses a Fisher-Yates shuffle.
*
* @param rng Source of randomness.
* @param array Array whose entries will be shuffled (in-place).
* @return a reference to the given array
*/
static int[] shuffle1(UniformRandomProvider rng, int[] array) {
for (int i = array.length; i > 1; i--) {
swap(array, i - 1, rng.nextInt(i));
}
return array;
}

/**
* Return two random values in {@code [0, range1)} and {@code [0, range2)}. The
* product bound is used for the reject algorithm. See Brackett-Rozinsky and Lemire.
*
* <p>The product bound can be any positive integer {@code >= range1*range2}.
* It may be updated to become {@code range1*range2}.
*
* @param range1 Range 1.
* @param range2 Range 2.
* @param productBound Product bound.
* @param rng Source of randomness.
* @return [i1, i2]
*/
static int[] randomBounded2(int range1, int range2, int[] productBound, UniformRandomProvider rng) {
long m = (rng.nextInt() & MASK_32) * range1;
// result1 and result2 are the top 32-bits of the long
long r1 = m;
// Leftover bits * range2
m = (m & MASK_32) * range2;
long r2 = m;
// Leftover bits must be unsigned
long l = m & MASK_32;
if (l < productBound[0]) {
final int bound = range1 * range2;
productBound[0] = bound;
if (l < bound) {
// 2^32 % bound
long t = POW_32 % bound;
while (l < t) {
m = (rng.nextInt() & MASK_32) * range1;
r1 = m;
m = (m & MASK_32) * range2;
r2 = m;
l = m & MASK_32;
}
}
}
// Convert to [0, range1), [0, range2)
return new int[] {(int) (r1 >> 32), (int) (r2 >> 32)};
}

/**
* Shuffles the entries of the given array.
*
* @param rng Source of randomness.
* @param array Array whose entries will be shuffled (in-place).
* @return a reference to the given array
*/
static int[] shuffle2(UniformRandomProvider rng, int[] array) {
int i = array.length;
// The threshold provided in the Brackett-Rozinsky and Lemire paper
// is the power of 2 below 20724. Note that the product 2^15*2^15
// is representable using signed integers.
for (; i > POW_15; i--) {
swap(array, i - 1, rng.nextInt(i));
}
// Batches of 2 for sizes up to 2^15 elements
final int[] productBound = {i * (i - 1)};
for (; i > 1; i -= 2) {
final int[] indices = randomBounded2(i, i - 1, productBound, rng);
final int index1 = indices[0];
final int index2 = indices[1];
swap(array, i - 1, index1);
swap(array, i - 2, index2);
}
return array;
}

/**
* Performs a shuffle.
*
* @param data Shuffle data.
* @param source Source of randomness.
* @param method Shuffle method.
* @return the shuffled data
*/
@Benchmark
public Object shuffle(ShuffleData data, RngSource source, ShuffleMethod method) {
final int[] a = data.getData();
method.getMethod().accept(source.getRNG(), a);
return a;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.rng.examples.jmh.sampling;

import org.apache.commons.math3.stat.inference.ChiSquareTest;
import org.apache.commons.rng.UniformRandomProvider;
import org.apache.commons.rng.sampling.ArraySampler;
import org.apache.commons.rng.sampling.PermutationSampler;
import org.apache.commons.rng.simple.RandomSource;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.params.ParameterizedTest;
import org.junit.jupiter.params.provider.CsvSource;

/**
* Test for array shuffle samplers in the {@link ArrayShuffleBenchmark} class.
*/
class ArrayShuffleBenchmarkTest {

/**
* The seed for the RNG used in the sampling tests.
*
* <p>This has been chosen to allow the test to pass with all generators.
* Set to null test with a random seed. When using a random
* seed re-run the test multiple times. Systematic failure of the same test
* should be investigated further.
*/
private static final Long SEED = 0xd1342543de82ef95L;

@ParameterizedTest
@CsvSource({
"42, 257",
"1356, 8073",
})
void testBoundedRandom2(int range1, int range2) {
Assertions.assertTrue((long) range1 * range2 < 1L << 31, "Product must be less than 2^31");

final int samples = 1000000;
final int bins = 8;
final long[][] observed = new long[bins][bins];
final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(SEED);
final int[] productBound = {range1 * range2};
final int width1 = (int) Math.ceil((double) range1 / bins);
final int width2 = (int) Math.ceil((double) range2 / bins);
for (int i = 0; i < samples; i++) {
final int[] indices = ArrayShuffleBenchmark.randomBounded2(range1, range2, productBound, rng);
final int index1 = indices[0] / width1;
final int index2 = indices[1] / width2;
observed[index1][index2]++;
}

final double p = new ChiSquareTest().chiSquareTest(observed);
Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
}

@ParameterizedTest
@CsvSource({
"257",
"8073",
// Above the bounded random threshold of 2^15
"31548",
})
void testShuffle(int length) {
final int[] array = PermutationSampler.natural(length);
final UniformRandomProvider rng = RandomSource.XO_SHI_RO_128_PP.create(SEED);
final int samples = 1000000 / length;
final int bins = 8;
final long[][] observed = new long[bins][bins];
final int width = (int) Math.ceil((double) length / bins);
for (int j = 0; j < samples; j++) {
ArraySampler.shuffle(rng, array);
for (int i = 0; i < length; i++) {
observed[i / width][array[i] / width]++;
}
}
final double p = new ChiSquareTest().chiSquareTest(observed);
Assertions.assertFalse(p < 1e-3, () -> "p-value too small: " + p);
}
}

0 comments on commit ec2b0b3

Please sign in to comment.