Skip to content

Commit

Permalink
Merge pull request #1 from apache/master
Browse files Browse the repository at this point in the history
Pull from Apache
  • Loading branch information
masyamandev authored Jan 26, 2020
2 parents fde8c86 + d61b83b commit 901fc77
Show file tree
Hide file tree
Showing 40 changed files with 6,162 additions and 1 deletion.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ site-content
.settings
.classpath
.project
.checkstyle

# Mac files
.DS_Store
.DS_Store
9 changes: 9 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -438,6 +438,9 @@
<contributor>
<name>Vamsi Kavuri</name>
</contributor>
<contributor>
<name>Claude Warren</name>
</contributor>
</contributors>

<dependencies>
Expand All @@ -459,6 +462,12 @@
<version>3.9</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
<version>1.14</version>
<optional>true</optional>
</dependency>
</dependencies>

<distributionManagement>
Expand Down
3 changes: 3 additions & 0 deletions src/changes/changes.xml
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,9 @@
<action issue="COLLECTIONS-744" dev="ggregory" type="add" due-to="Gary Gregory">
Add org.apache.commons.collections4.EnumerationUtils.asIterable(Enumeration).
</action>
<action issue="COLLECTIONS-728" dev="ggregory" type="add" due-to="Claude Warren">
BloomFilter contribution.
</action>
</release>
<release version="4.4" date="2019-07-05" description="Maintenance release.">
<action issue="COLLECTIONS-710" dev="ggregory" type="fix" due-to="Yu Shi, Gary Gregory">
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,287 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.collections4.bloomfilter;

import java.util.BitSet;
import java.util.PrimitiveIterator.OfInt;

import org.apache.commons.collections4.bloomfilter.hasher.HashFunctionIdentity;
import org.apache.commons.collections4.bloomfilter.hasher.Hasher;
import org.apache.commons.collections4.bloomfilter.hasher.Shape;
import org.apache.commons.collections4.bloomfilter.hasher.StaticHasher;

/**
* An abstract Bloom filter providing default implementations for most Bloom filter
* functions. Specific implementations are encouraged to override the methods that can be
* more efficiently implemented.
* <p>
* This abstract class provides additional functionality not declared in the interface.
* Specifically:
* <ul>
* <li>orCardinality</li>
* <li>jaccardSimilarity</li>
* <li>jaccardDistance</li>
* <li>cosineSimilarity</li>
* <li>cosineDistance</li>
* <li>estimateSize</li>
* <li>estimateUnionSize</li>
* <li>estimateIntersectionSize</li>
* <li>isFull</li>
* </ul>
*
* @since 4.5
*/
public abstract class AbstractBloomFilter implements BloomFilter {

/**
* The shape used by this BloomFilter
*/
private final Shape shape;

/**
* Gets an array of little-endian long values representing the on bits of this filter.
* bits 0-63 are in the first long.
*
* @return the LongBuffer representation of this filter.
*/
@Override
public abstract long[] getBits();

/**
* Creates a StaticHasher that contains the indexes of the bits that are on in this
* filter.
*
* @return a StaticHasher for that produces this Bloom filter.
*/
@Override
public abstract StaticHasher getHasher();

/**
* Construct a Bloom filter with the specified shape.
*
* @param shape The shape.
*/
protected AbstractBloomFilter(Shape shape) {
this.shape = shape;
}

/**
* Verify the other Bloom filter has the same shape as this Bloom filter.
*
* @param other the other filter to check.
* @throws IllegalArgumentException if the shapes are not the same.
*/
protected void verifyShape(BloomFilter other) {
verifyShape(other.getShape());
}

/**
* Verify the specified shape has the same shape as this Bloom filter.
*
* @param shape the other shape to check.
* @throws IllegalArgumentException if the shapes are not the same.
*/
protected void verifyShape(Shape shape) {
if (!this.shape.equals(shape)) {
throw new IllegalArgumentException(String.format("Shape %s is not the same as %s", shape, this.shape));
}
}

/**
* Verifies that the hasher has the same name as the shape.
*
* @param hasher the Hasher to check
*/
protected void verifyHasher(Hasher hasher) {
if (shape.getHashFunctionIdentity().getSignature() != hasher.getHashFunctionIdentity().getSignature()) {
throw new IllegalArgumentException(
String.format("Hasher (%s) is not the hasher for shape (%s)",
HashFunctionIdentity.asCommonString(hasher.getHashFunctionIdentity()),
shape.toString()));
}
}

/**
* Gets the shape of this filter.
*
* @return The shape of this filter.
*/
@Override
public final Shape getShape() {
return shape;
}

/**
* Merge the other Bloom filter into this one.
*
* @param other the other Bloom filter.
*/
@Override
abstract public void merge(BloomFilter other);

/**
* Merge the decomposed Bloom filter defined by the hasher into this Bloom
* filter. The hasher provides an iterator of bit indexes to enable.
*
* @param hasher the hasher to provide the indexes.
* @throws IllegalArgumentException if the shape argument does not match the shape of
* this filter, or if the hasher is not the specified one
*/
@Override
abstract public void merge(Hasher hasher);

/**
* Gets the cardinality of this Bloom filter.
*
* @return the cardinality (number of enabled bits) in this filter.
*/
@Override
public int cardinality() {
return BitSet.valueOf(getBits()).cardinality();
}

/**
* Performs a logical "AND" with the other Bloom filter and returns the cardinality of
* the result.
*
* @param other the other Bloom filter.
* @return the cardinality of the result of {@code ( this AND other )}.
*/
@Override
public int andCardinality(BloomFilter other) {
verifyShape(other);
long[] mine = getBits();
long[] theirs = other.getBits();
int limit = Integer.min(mine.length, theirs.length);
long[] result = new long[limit];
for (int i = 0; i < limit; i++) {
result[i] = mine[i] & theirs[i];
}
return BitSet.valueOf(result).cardinality();
}

@Override
public int orCardinality(BloomFilter other) {
verifyShape(other);
long[] mine = getBits();
long[] theirs = other.getBits();
long[] remainder = null;
long[] result = null;
if (mine.length > theirs.length) {
result = new long[mine.length];
remainder = mine;
} else {
result = new long[theirs.length];
remainder = theirs;

}
int limit = Integer.min(mine.length, theirs.length);
for (int i = 0; i < limit; i++) {
result[i] = mine[i] | theirs[i];
}
if (limit<result.length)
{
System.arraycopy(remainder, limit, result, limit, result.length-limit);
}
return BitSet.valueOf(result).cardinality();
}

/**
* Performs a logical "XOR" with the other Bloom filter and returns the cardinality of
* the result.
*
* @param other the other Bloom filter.
* @return the cardinality of the result of {@code( this XOR other )}
*/
@Override
public int xorCardinality(BloomFilter other) {
verifyShape(other);
long[] mine = getBits();
long[] theirs = other.getBits();
long[] remainder = null;
long[] result = null;
if (mine.length > theirs.length) {
result = new long[mine.length];
remainder = mine;
} else {
result = new long[theirs.length];
remainder = theirs;

}
int limit = Integer.min(mine.length, theirs.length);
for (int i = 0; i < limit; i++) {
result[i] = mine[i] ^ theirs[i];
}
if (limit<result.length)
{
System.arraycopy(remainder, limit, result, limit, result.length-limit);
}
return BitSet.valueOf(result).cardinality();
}

/**
* Performs a contains check. Effectively this AND other == other.
*
* @param other the Other Bloom filter.
* @return true if this filter matches the other.
*/
@Override
public boolean contains(BloomFilter other) {
verifyShape(other);
return other.cardinality() == andCardinality(other);
}

/**
* Performs a contains check against a decomposed Bloom filter. The shape must match
* the shape of this filter. The hasher provides bit indexes to check for. Effectively
* decomposed AND this == decomposed.
*
* @param hasher The hasher containing the bits to check.
* @return true if this filter contains the other.
* @throws IllegalArgumentException if the shape argument does not match the shape of
* this filter, or if the hasher is not the specified one
*/
@Override
public boolean contains(Hasher hasher) {
verifyHasher( hasher );
long[] buff = getBits();

OfInt iter = hasher.getBits(shape);
while (iter.hasNext()) {
int idx = iter.nextInt();
int buffIdx = idx / Long.SIZE;
int pwr = Math.floorMod(idx, Long.SIZE);
long buffOffset = 1L << pwr;
if ((buff[buffIdx] & buffOffset) == 0) {
return false;
}
}
return true;
}

/**
* Determines if the bloom filter is "full". Full is defined as having no unset
* bits.
*
* @return true if the filter is full.
*/
public final boolean isFull() {
return cardinality() == getShape().getNumberOfBits();
}

}
Loading

0 comments on commit 901fc77

Please sign in to comment.