-
Notifications
You must be signed in to change notification settings - Fork 617
Implement BloomFilter class #4524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
milaGGL
merged 15 commits into
mila/BloomFilter
from
mila/BloomFilter-implement-BloomFilter-class
Jan 20, 2023
Merged
Changes from 14 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
de70b05
Implement BloomFilter class
milaGGL 44299ec
add golden test
milaGGL 3e3d2b0
Remove BigInteger
milaGGL 0a3c6f9
resolve comments
milaGGL be7071c
removed UnsignedLong class
milaGGL ffe0a1d
make methods private
milaGGL b50b49a
add javadocs
milaGGL 1099d17
resolve comments
milaGGL 2f5d335
format
milaGGL 3c81c7d
resolve comments
milaGGL 5305332
Merge branch 'mila/BloomFilter' into mila/BloomFilter-implement-Bloom…
milaGGL 5f66d92
resolve comments
milaGGL be8ebed
format
milaGGL 7eb326f
fix format
milaGGL e0c6fc0
resolve comments
milaGGL File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
167 changes: 167 additions & 0 deletions
167
firebase-firestore/src/main/java/com/google/firebase/firestore/remote/BloomFilter.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,167 @@ | ||
// Copyright 2023 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package com.google.firebase.firestore.remote; | ||
|
||
import android.util.Base64; | ||
import androidx.annotation.NonNull; | ||
import androidx.annotation.VisibleForTesting; | ||
import java.nio.charset.StandardCharsets; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
|
||
public class BloomFilter { | ||
private final int bitCount; | ||
private final byte[] bitmap; | ||
private final int hashCount; | ||
private final MessageDigest md5HashMessageDigest; | ||
|
||
public BloomFilter(@NonNull byte[] bitmap, int padding, int hashCount) { | ||
if (bitmap == null) { | ||
throw new NullPointerException("Bitmap cannot be null."); | ||
} | ||
if (padding < 0 || padding >= 8) { | ||
throw new IllegalArgumentException("Invalid padding: " + padding); | ||
} | ||
if (hashCount < 0) { | ||
throw new IllegalArgumentException("Invalid hash count: " + hashCount); | ||
} | ||
if (bitmap.length > 0 && hashCount == 0) { | ||
// Only empty bloom filter can have 0 hash count. | ||
throw new IllegalArgumentException("Invalid hash count: " + hashCount); | ||
} | ||
if (bitmap.length == 0) { | ||
// Empty bloom filter should have 0 padding. | ||
if (padding != 0) { | ||
throw new IllegalArgumentException( | ||
"Expected padding of 0 when bitmap length is 0, but got " + padding); | ||
} | ||
} | ||
|
||
this.bitmap = bitmap; | ||
this.hashCount = hashCount; | ||
this.bitCount = bitmap.length * 8 - padding; | ||
this.md5HashMessageDigest = createMd5HashMessageDigest(); | ||
} | ||
|
||
@VisibleForTesting | ||
int getBitCount() { | ||
return this.bitCount; | ||
} | ||
|
||
/** | ||
* Check whether the given string is a possible member of the bloom filter. It might return false | ||
* positive result, ie, the given string is not a member of the bloom filter, but the method | ||
* returned true. | ||
* | ||
* @param value the string to be tested for membership. | ||
* @return true if the given string might be contained in the bloom filter, or false if the given | ||
* string is definitely not contained in the bloom filter. | ||
*/ | ||
public boolean mightContain(@NonNull String value) { | ||
// Empty bitmap should return false on membership check. | ||
if (this.bitCount == 0) { | ||
return false; | ||
} | ||
|
||
byte[] hashedValue = md5HashDigest(value); | ||
if (hashedValue.length != 16) { | ||
throw new RuntimeException( | ||
"Invalid md5 hash array length: " + hashedValue.length + " (expected 16)"); | ||
} | ||
|
||
long hash1 = getLongLittleEndian(hashedValue, 0); | ||
long hash2 = getLongLittleEndian(hashedValue, 8); | ||
|
||
for (int i = 0; i < this.hashCount; i++) { | ||
int index = this.getBitIndex(hash1, hash2, i); | ||
if (!this.isBitSet(index)) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
/** Hash a string using md5 hashing algorithm, and return an array of 16 bytes. */ | ||
@NonNull | ||
private byte[] md5HashDigest(@NonNull String value) { | ||
return md5HashMessageDigest.digest(value.getBytes(StandardCharsets.UTF_8)); | ||
} | ||
|
||
@NonNull | ||
private static MessageDigest createMd5HashMessageDigest() { | ||
try { | ||
return MessageDigest.getInstance("MD5"); | ||
} catch (NoSuchAlgorithmException e) { | ||
throw new RuntimeException("Missing MD5 MessageDigest provider: ", e); | ||
} | ||
} | ||
|
||
/** Interpret 8 bytes into a long, using little endian 2’s complement. */ | ||
private static long getLongLittleEndian(@NonNull byte[] bytes, int offset) { | ||
long result = 0; | ||
for (int i = 0; i < 8; i++) { | ||
result |= (bytes[offset + i] & 0xFFL) << (i * 8); | ||
} | ||
return result; | ||
} | ||
|
||
/** | ||
* Calculate the ith hash value based on the hashed 64 bit unsigned integers, and calculate its | ||
* corresponding bit index in the bitmap to be checked. | ||
*/ | ||
private int getBitIndex(long hash1, long hash2, int hashIndex) { | ||
// Calculate hashed value h(i) = h1 + (i * h2). | ||
// Even though we are interpreting hash1 and hash2 as unsigned, the addition and multiplication | ||
// operators still perform the correct operation and give the desired overflow behavior. | ||
long combinedHash = hash1 + (hash2 * hashIndex); | ||
long modulo = unsignedRemainder(combinedHash, this.bitCount); | ||
return (int) modulo; | ||
} | ||
|
||
/** | ||
* Calculate modulo, where the dividend and divisor are treated as unsigned 64-bit longs. | ||
* | ||
* <p>The implementation is taken from <a | ||
* href="https://github.com/google/guava/blob/553037486901cc60820ab7dcb38a25b6f34eba43/android/guava/src/com/google/common/primitives/UnsignedLongs.java">Guava</a>, | ||
* simplified to our needs. | ||
* | ||
* <p> | ||
*/ | ||
private static long unsignedRemainder(long dividend, long divisor) { | ||
long quotient = ((dividend >>> 1) / divisor) << 1; | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
long remainder = dividend - quotient * divisor; | ||
return remainder - (remainder >= divisor ? divisor : 0); | ||
} | ||
|
||
/** Return whether the bit at the given index in the bitmap is set to 1. */ | ||
private boolean isBitSet(int index) { | ||
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))). | ||
byte byteAtIndex = this.bitmap[index / 8]; | ||
int offset = index % 8; | ||
return (byteAtIndex & (0x01 << offset)) != 0; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "BloomFilter{" | ||
+ "hashCount=" | ||
+ hashCount | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
+ ", size=" | ||
+ bitCount | ||
+ ", bitmap=\"" | ||
milaGGL marked this conversation as resolved.
Show resolved
Hide resolved
|
||
+ Base64.encodeToString(bitmap, Base64.NO_WRAP) | ||
+ "\"}"; | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.