Skip to content

Implement BloomFilter class #4524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 20, 2023
Original file line number Diff line number Diff line change
Expand Up @@ -14,20 +14,18 @@

package com.google.firebase.firestore.remote;

import android.util.Base64;
import androidx.annotation.NonNull;
import com.google.firebase.firestore.util.Logger;
import androidx.annotation.VisibleForTesting;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;

public class BloomFilter {
private static final String TAG = "BloomFilter";

private final int size;
private final byte[] bitmap;
private final int hashCount;

public BloomFilter(@NonNull byte[] bitmap, @NonNull int padding, @NonNull int hashCount) {
public BloomFilter(@NonNull byte[] bitmap, int padding, int hashCount) {
if (padding < 0 || padding >= 8) {
throw new IllegalArgumentException("Invalid padding: " + padding);
}
Expand All @@ -52,28 +50,34 @@ public BloomFilter(@NonNull byte[] bitmap, @NonNull int padding, @NonNull int ha
this.size = bitmap.length * 8 - padding;
}

@NonNull
public int getSize() {
return this.size;
}

public boolean isEmpty() {
/** Return if a bloom filter is empty. */
@VisibleForTesting
boolean isEmpty() {
return this.size == 0;
}

/**
* Check whether the document path is a possible member of the bloom filter. It might return false
* positive result, ie, a document path is not a member of the bloom filter, but the method
* returned true.
*
* @param value a string representation of the document path.
* @return true if the document path might be contained in the bloom filter.
*/
public boolean mightContain(@NonNull String value) {
// Empty bitmap or empty value should always return false on membership check.
if (this.isEmpty() || value.isEmpty()) {
return false;
}

byte[] md5HashedValue = this.MD5Hash(value);
if (md5HashedValue == null || md5HashedValue.length != 16) {
return false;
byte[] md5HashedValue = md5Hash(value);
if (md5HashedValue.length != 16) {
throw new RuntimeException(
"Invalid md5HashedValue.length: " + md5HashedValue.length + " (expected 16)");
}

long hash1 = this.getLongLittleEndian(md5HashedValue, 0);
long hash2 = this.getLongLittleEndian(md5HashedValue, 8);
long hash1 = getLongLittleEndian(md5HashedValue, 0);
long hash2 = getLongLittleEndian(md5HashedValue, 8);

for (int i = 0; i < this.hashCount; i++) {
int index = this.getBitIndex(hash1, hash2, i);
Expand All @@ -84,35 +88,46 @@ public boolean mightContain(@NonNull String value) {
return true;
}

public static byte[] MD5Hash(String value) {
/** Hash a string using md5 hashing algorithm, and return an array of 16 bytes. */
@NonNull
private static byte[] md5Hash(@NonNull String value) {
MessageDigest digest;
try {
MessageDigest digest = MessageDigest.getInstance("MD5");
digest.update(value.getBytes());
return digest.digest();
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
Logger.warn(TAG, "Could not create hashing algorithm: MD5.", e);
return null;
throw new RuntimeException("Missing MD5 MessageDigest provider.", e);
}
return digest.digest(value.getBytes());
}

// Interpret 8 bytes into a long, using little endian 2’s complement.
public static long getLongLittleEndian(byte[] bytes, int offset) {
/** Interpret 8 bytes into a long, using little endian 2’s complement. */
private static long getLongLittleEndian(@NonNull byte[] bytes, int offset) {
long result = 0;
for (int i = 0; i < 8 && i < bytes.length; i++) {
result |= (bytes[offset + i] & 0xFFL) << (i * 8);
}
return result;
}

// Calculate the ith hash value based on the hashed 64bit integers,
// and calculate its corresponding bit index in the bitmap to be checked.
private int getBitIndex(long num1, long num2, int index) {
/**
* Calculate the ith hash value based on the hashed 64bit integers, and calculate its
* corresponding bit index in the bitmap to be checked.
*/
private int getBitIndex(long hash1, long hash2, int index) {
// Calculate hashed value h(i) = h1 + (i * h2).
Long hashValue2 = num1 + num2 * index;
return (int) Long.remainderUnsigned(hashValue2, this.size);
long combinedHash = hash1 + (hash2 * index);
long mod = unsignedRemainder(combinedHash, this.size);
return (int) mod;
}

/** Calculate module, where the dividend and divisor are treated as unsigned 64-bit longs. */
private static long unsignedRemainder(long dividend, long divisor) {
long quotient = ((dividend >>> 1) / divisor) << 1;
long remainder = dividend - quotient * divisor;
return remainder - (remainder >= divisor ? divisor : 0);
}

// Return whether the bit on the given index in the bitmap is set to 1.
/** Return whether the bit on the given index in the bitmap is set to 1. */
private boolean isBitSet(int index) {
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))).
byte byteAtIndex = this.bitmap[(index / 8)];
Expand All @@ -123,12 +138,12 @@ private boolean isBitSet(int index) {
@Override
public String toString() {
return "BloomFilter{"
+ "bitmap="
+ Arrays.toString(bitmap)
+ ", hashCount="
+ hashCount
+ ", size="
+ size
+ "bitmap="
+ Base64.encodeToString(bitmap, Base64.NO_WRAP)
+ '}';
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class BloomFilterTest {
@Test
public void testEmptyBloomFilter() {
BloomFilter bloomFilter = new BloomFilter(new byte[0], 0, 0);
assertEquals(bloomFilter.getSize(), 0);
assertTrue(bloomFilter.isEmpty());
}

@Test
Expand All @@ -57,9 +57,9 @@ public void testEmptyBloomFilterThrowException() {
@Test
public void testNonEmptyBloomFilter() {
BloomFilter bloomFilter1 = new BloomFilter(new byte[1], 0, 1);
assertEquals(bloomFilter1.getSize(), 8);
assertFalse(bloomFilter1.isEmpty());
BloomFilter bloomFilter2 = new BloomFilter(new byte[1], 7, 1);
assertEquals(bloomFilter2.getSize(), 1);
assertFalse(bloomFilter2.isEmpty());
}

@Test
Expand Down Expand Up @@ -115,27 +115,27 @@ public void testBloomFilterMightContainOnEmptyStringAlwaysReturnFalse() {
* membership results from n to 2n is expected to be false with some false positive results.
*/
@Test
@SuppressWarnings("DefaultCharset")
public void testBloomFilterGoldenTest() throws Exception {
String documentPrefix = "projects/project-1/databases/database-1/documents/coll/doc";

// Import the golden test files for bloom filter
HashMap<String, JSONObject> parsedSpecFiles = new HashMap<>();
File jsonDir = new File("src/test/resources/bloom_filter_golden_test_data");
File[] jsonFiles = jsonDir.listFiles();
for (File f : jsonFiles) {
if (!f.toString().endsWith(".json")) {
assert jsonFiles != null;
for (File file : jsonFiles) {
if (!file.toString().endsWith(".json")) {
continue;
}

// Read the files into a map.
StringBuilder builder = new StringBuilder();
BufferedReader reader = new BufferedReader(new FileReader(f));
BufferedReader reader = new BufferedReader(new FileReader(file));
Stream<String> lines = reader.lines();
lines.forEach(builder::append);
String json = builder.toString();
JSONObject fileJSON = new JSONObject(json);
parsedSpecFiles.put(f.getName(), fileJSON);
parsedSpecFiles.put(file.getName(), fileJSON);
}

// Loop and test the files
Expand All @@ -146,6 +146,7 @@ public void testBloomFilterGoldenTest() throws Exception {

// Read test data and instantiate a BloomFilter object
JSONObject fileJSON = parsedSpecFiles.get(fileName);
assert fileJSON != null;
JSONObject bits = fileJSON.getJSONObject("bits");
String bitmap = bits.getString("bitmap");
int padding = bits.getInt("padding");
Expand All @@ -156,13 +157,21 @@ public void testBloomFilterGoldenTest() throws Exception {
// Find corresponding membership test result.
JSONObject resultJSON =
parsedSpecFiles.get(fileName.replace("bloom_filter_proto", "membership_test_result"));
assert resultJSON != null;
String membershipTestResults = resultJSON.getString("membershipTestResults");

// Run and compare mightContain result with the expectation.
for (int i = 0; i < membershipTestResults.length(); i++) {
boolean expectedMembershipResult = membershipTestResults.charAt(i) == '1';
boolean mightContain = bloomFilter.mightContain(documentPrefix + i);
assertEquals(mightContain, expectedMembershipResult);
assertEquals(
"MightContain result doesn't match the expectation. File: "
+ fileName
+ ". Document: "
+ documentPrefix
+ i,
mightContain,
expectedMembershipResult);
}
}
}
Expand Down