-
Notifications
You must be signed in to change notification settings - Fork 617
Implement BloomFilter class #4524
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
milaGGL
merged 15 commits into
mila/BloomFilter
from
mila/BloomFilter-implement-BloomFilter-class
Jan 20, 2023
Merged
Changes from 3 commits
Commits
Show all changes
15 commits
Select commit
Hold shift + click to select a range
de70b05
Implement BloomFilter class
milaGGL 44299ec
add golden test
milaGGL 3e3d2b0
Remove BigInteger
milaGGL 0a3c6f9
resolve comments
milaGGL be7071c
removed UnsignedLong class
milaGGL ffe0a1d
make methods private
milaGGL b50b49a
add javadocs
milaGGL 1099d17
resolve comments
milaGGL 2f5d335
format
milaGGL 3c81c7d
resolve comments
milaGGL 5305332
Merge branch 'mila/BloomFilter' into mila/BloomFilter-implement-Bloom…
milaGGL 5f66d92
resolve comments
milaGGL be8ebed
format
milaGGL 7eb326f
fix format
milaGGL e0c6fc0
resolve comments
milaGGL File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
134 changes: 134 additions & 0 deletions
134
firebase-firestore/src/main/java/com/google/firebase/firestore/remote/BloomFilter.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
// Copyright 2022 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package com.google.firebase.firestore.remote; | ||
|
||
import androidx.annotation.NonNull; | ||
import com.google.firebase.firestore.util.Logger; | ||
import java.security.MessageDigest; | ||
import java.security.NoSuchAlgorithmException; | ||
import java.util.Arrays; | ||
|
||
public class BloomFilter { | ||
private static final String TAG = "BloomFilter"; | ||
|
||
private final int size; | ||
private final byte[] bitmap; | ||
private final int hashCount; | ||
|
||
public BloomFilter(@NonNull byte[] bitmap, @NonNull int padding, @NonNull int hashCount) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (padding < 0 || padding >= 8) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
throw new IllegalArgumentException("Invalid padding: " + padding); | ||
} | ||
|
||
if (bitmap.length > 0) { | ||
// Only empty bloom filter can have 0 hash count. | ||
if (hashCount <= 0) { | ||
throw new IllegalArgumentException("Invalid hash count: " + hashCount); | ||
} | ||
} else { | ||
if (hashCount < 0) { | ||
throw new IllegalArgumentException("Invalid hash count: " + hashCount); | ||
} | ||
|
||
// Empty bloom filter should have 0 padding. | ||
if (padding != 0) { | ||
throw new IllegalArgumentException("Invalid padding when bitmap length is 0: " + padding); | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
this.bitmap = bitmap; | ||
this.hashCount = hashCount; | ||
this.size = bitmap.length * 8 - padding; | ||
} | ||
|
||
@NonNull | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public int getSize() { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return this.size; | ||
} | ||
|
||
public boolean isEmpty() { | ||
return this.size == 0; | ||
} | ||
|
||
public boolean mightContain(@NonNull String value) { | ||
// Empty bitmap or empty value should always return false on membership check. | ||
if (this.isEmpty() || value.isEmpty()) { | ||
return false; | ||
} | ||
|
||
byte[] md5HashedValue = this.MD5Hash(value); | ||
if (md5HashedValue == null || md5HashedValue.length != 16) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return false; | ||
} | ||
|
||
long hash1 = this.getLongLittleEndian(md5HashedValue, 0); | ||
long hash2 = this.getLongLittleEndian(md5HashedValue, 8); | ||
|
||
for (int i = 0; i < this.hashCount; i++) { | ||
int index = this.getBitIndex(hash1, hash2, i); | ||
if (!this.isBitSet(index)) { | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
public static byte[] MD5Hash(String value) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
try { | ||
MessageDigest digest = MessageDigest.getInstance("MD5"); | ||
digest.update(value.getBytes()); | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return digest.digest(); | ||
} catch (NoSuchAlgorithmException e) { | ||
Logger.warn(TAG, "Could not create hashing algorithm: MD5.", e); | ||
return null; | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
|
||
// Interpret 8 bytes into a long, using little endian 2’s complement. | ||
public static long getLongLittleEndian(byte[] bytes, int offset) { | ||
long result = 0; | ||
for (int i = 0; i < 8 && i < bytes.length; i++) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result |= (bytes[offset + i] & 0xFFL) << (i * 8); | ||
} | ||
return result; | ||
} | ||
|
||
// Calculate the ith hash value based on the hashed 64bit integers, | ||
// and calculate its corresponding bit index in the bitmap to be checked. | ||
private int getBitIndex(long num1, long num2, int index) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Calculate hashed value h(i) = h1 + (i * h2). | ||
Long hashValue2 = num1 + num2 * index; | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return (int) Long.remainderUnsigned(hashValue2, this.size); | ||
} | ||
|
||
// Return whether the bit on the given index in the bitmap is set to 1. | ||
private boolean isBitSet(int index) { | ||
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))). | ||
byte byteAtIndex = this.bitmap[(index / 8)]; | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
int offset = index % 8; | ||
return (byteAtIndex & (0x01 << offset)) != 0; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return "BloomFilter{" | ||
+ "bitmap=" | ||
+ Arrays.toString(bitmap) | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
+ ", hashCount=" | ||
+ hashCount | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
+ ", size=" | ||
+ size | ||
+ '}'; | ||
} | ||
} |
169 changes: 169 additions & 0 deletions
169
firebase-firestore/src/test/java/com/google/firebase/firestore/remote/BloomFilterTest.java
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,169 @@ | ||
// Copyright 2022 Google LLC | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); | ||
// you may not use this file except in compliance with the License. | ||
// You may obtain a copy of the License at | ||
// | ||
// http://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, | ||
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
// See the License for the specific language governing permissions and | ||
// limitations under the License. | ||
|
||
package com.google.firebase.firestore.remote; | ||
|
||
import static com.google.common.truth.Truth.assertThat; | ||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.assertFalse; | ||
import static org.junit.Assert.assertThrows; | ||
import static org.junit.Assert.assertTrue; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.File; | ||
import java.io.FileReader; | ||
import java.util.Base64; | ||
import java.util.HashMap; | ||
import java.util.stream.Stream; | ||
import org.json.JSONObject; | ||
import org.junit.Test; | ||
import org.junit.runner.RunWith; | ||
import org.robolectric.RobolectricTestRunner; | ||
import org.robolectric.annotation.Config; | ||
|
||
@RunWith(RobolectricTestRunner.class) | ||
@Config(manifest = Config.NONE) | ||
public class BloomFilterTest { | ||
|
||
@Test | ||
public void testEmptyBloomFilter() { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
BloomFilter bloomFilter = new BloomFilter(new byte[0], 0, 0); | ||
assertEquals(bloomFilter.getSize(), 0); | ||
} | ||
|
||
@Test | ||
public void testEmptyBloomFilterThrowException() { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
IllegalArgumentException paddingException = | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[0], 1, 0)); | ||
assertThat(paddingException) | ||
.hasMessageThat() | ||
.contains("Invalid padding when bitmap length is 0: 1"); | ||
IllegalArgumentException hashCountException = | ||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[0], 0, -1)); | ||
assertThat(hashCountException).hasMessageThat().contains("Invalid hash count: -1"); | ||
} | ||
|
||
@Test | ||
public void testNonEmptyBloomFilter() { | ||
BloomFilter bloomFilter1 = new BloomFilter(new byte[1], 0, 1); | ||
assertEquals(bloomFilter1.getSize(), 8); | ||
BloomFilter bloomFilter2 = new BloomFilter(new byte[1], 7, 1); | ||
assertEquals(bloomFilter2.getSize(), 1); | ||
} | ||
|
||
@Test | ||
public void testNonEmptyBloomFilterThrowException() { | ||
IllegalArgumentException negativePaddingException = | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[1], -1, 1)); | ||
assertThat(negativePaddingException).hasMessageThat().contains("Invalid padding: -1"); | ||
IllegalArgumentException overflowPaddingException = | ||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[1], 8, 1)); | ||
assertThat(overflowPaddingException).hasMessageThat().contains("Invalid padding: 8"); | ||
|
||
IllegalArgumentException negativeHashCountException = | ||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[1], 1, -1)); | ||
assertThat(negativeHashCountException).hasMessageThat().contains("Invalid hash count: -1"); | ||
IllegalArgumentException zeroHashCountException = | ||
assertThrows(IllegalArgumentException.class, () -> new BloomFilter(new byte[1], 1, 0)); | ||
assertThat(zeroHashCountException).hasMessageThat().contains("Invalid hash count: 0"); | ||
} | ||
|
||
@Test | ||
public void testBloomFilterProcessNonStandardCharacters() { | ||
// A non-empty BloomFilter object with 1 insertion : "ÀÒ∑" | ||
BloomFilter bloomFilter = new BloomFilter(new byte[] {(byte) 237, 5}, 5, 8); | ||
assertTrue(bloomFilter.mightContain("ÀÒ∑")); | ||
assertFalse(bloomFilter.mightContain("Ò∑À")); | ||
} | ||
|
||
@Test | ||
public void testEmptyBloomFilterMightContainAlwaysReturnFalse() { | ||
BloomFilter bloomFilter = new BloomFilter(new byte[0], 0, 0); | ||
assertFalse(bloomFilter.mightContain("abc")); | ||
} | ||
|
||
@Test | ||
public void testBloomFilterMightContainOnEmptyStringAlwaysReturnFalse() { | ||
BloomFilter emptyBloomFilter = new BloomFilter(new byte[0], 0, 0); | ||
BloomFilter nonEmptyBloomFilter = | ||
new BloomFilter(new byte[] {(byte) 255, (byte) 255, (byte) 255}, 1, 16); | ||
|
||
assertFalse(emptyBloomFilter.mightContain("")); | ||
assertFalse(nonEmptyBloomFilter.mightContain("")); | ||
} | ||
|
||
/** | ||
* Golden tests are generated by backend based on inserting n number of document paths into a | ||
* bloom filter. | ||
* | ||
* <p>Full document path is generated by concatenating documentPrefix and number n, eg, | ||
* projects/project-1/databases/database-1/documents/coll/doc12. | ||
* | ||
* <p>The test result is generated by checking the membership of documents from documentPrefix+0 | ||
* to documentPrefix+2n. The membership results from 0 to n is expected to be true, and the | ||
* membership results from n to 2n is expected to be false with some false positive results. | ||
*/ | ||
@Test | ||
@SuppressWarnings("DefaultCharset") | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
public void testBloomFilterGoldenTest() throws Exception { | ||
String documentPrefix = "projects/project-1/databases/database-1/documents/coll/doc"; | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
// Import the golden test files for bloom filter | ||
HashMap<String, JSONObject> parsedSpecFiles = new HashMap<>(); | ||
File jsonDir = new File("src/test/resources/bloom_filter_golden_test_data"); | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
File[] jsonFiles = jsonDir.listFiles(); | ||
for (File f : jsonFiles) { | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (!f.toString().endsWith(".json")) { | ||
continue; | ||
} | ||
|
||
// Read the files into a map. | ||
StringBuilder builder = new StringBuilder(); | ||
BufferedReader reader = new BufferedReader(new FileReader(f)); | ||
Stream<String> lines = reader.lines(); | ||
lines.forEach(builder::append); | ||
String json = builder.toString(); | ||
JSONObject fileJSON = new JSONObject(json); | ||
parsedSpecFiles.put(f.getName(), fileJSON); | ||
} | ||
|
||
// Loop and test the files | ||
for (String fileName : parsedSpecFiles.keySet()) { | ||
if (fileName.contains("membership_test_result")) { | ||
continue; | ||
} | ||
|
||
// Read test data and instantiate a BloomFilter object | ||
JSONObject fileJSON = parsedSpecFiles.get(fileName); | ||
JSONObject bits = fileJSON.getJSONObject("bits"); | ||
String bitmap = bits.getString("bitmap"); | ||
int padding = bits.getInt("padding"); | ||
int hashCount = fileJSON.getInt("hashCount"); | ||
BloomFilter bloomFilter = | ||
new BloomFilter(Base64.getDecoder().decode(bitmap), padding, hashCount); | ||
|
||
// Find corresponding membership test result. | ||
JSONObject resultJSON = | ||
parsedSpecFiles.get(fileName.replace("bloom_filter_proto", "membership_test_result")); | ||
String membershipTestResults = resultJSON.getString("membershipTestResults"); | ||
|
||
// Run and compare mightContain result with the expectation. | ||
for (int i = 0; i < membershipTestResults.length(); i++) { | ||
boolean expectedMembershipResult = membershipTestResults.charAt(i) == '1'; | ||
boolean mightContain = bloomFilter.mightContain(documentPrefix + i); | ||
assertEquals(mightContain, expectedMembershipResult); | ||
dconeybe marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
} | ||
} | ||
} |
1 change: 1 addition & 0 deletions
1
...oom_filter_golden_test_data/Validation_BloomFilterTest_MD5_1_0001_bloom_filter_proto.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{ "bits": { "bitmap": "RswZ", "padding": 1 }, "hashCount": 16 } |
1 change: 1 addition & 0 deletions
1
...filter_golden_test_data/Validation_BloomFilterTest_MD5_1_0001_membership_test_result.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"membershipTestResults" : "10"} |
1 change: 1 addition & 0 deletions
1
...bloom_filter_golden_test_data/Validation_BloomFilterTest_MD5_1_01_bloom_filter_proto.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"bits":{"bitmap":"mwE=","padding":5},"hashCount":8} |
1 change: 1 addition & 0 deletions
1
...m_filter_golden_test_data/Validation_BloomFilterTest_MD5_1_01_membership_test_result.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"membershipTestResults" : "10"} |
1 change: 1 addition & 0 deletions
1
.../bloom_filter_golden_test_data/Validation_BloomFilterTest_MD5_1_1_bloom_filter_proto.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"bits":{"bitmap":"","padding":0},"hashCount":0} |
1 change: 1 addition & 0 deletions
1
...om_filter_golden_test_data/Validation_BloomFilterTest_MD5_1_1_membership_test_result.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"membershipTestResults" : "00"} |
7 changes: 7 additions & 0 deletions
7
...filter_golden_test_data/Validation_BloomFilterTest_MD5_50000_0001_bloom_filter_proto.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
...er_golden_test_data/Validation_BloomFilterTest_MD5_50000_0001_membership_test_result.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
...m_filter_golden_test_data/Validation_BloomFilterTest_MD5_50000_01_bloom_filter_proto.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
...lter_golden_test_data/Validation_BloomFilterTest_MD5_50000_01_membership_test_result.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
...om_filter_golden_test_data/Validation_BloomFilterTest_MD5_50000_1_bloom_filter_proto.json
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
{"bits":{"bitmap":"","padding":0},"hashCount":0} |
1 change: 1 addition & 0 deletions
1
...ilter_golden_test_data/Validation_BloomFilterTest_MD5_50000_1_membership_test_result.json
Large diffs are not rendered by default.
Oops, something went wrong.
1 change: 1 addition & 0 deletions
1
..._filter_golden_test_data/Validation_BloomFilterTest_MD5_5000_0001_bloom_filter_proto.json
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.