Skip to content

Implement BloomFilter class #4524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 20, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,167 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.firebase.firestore.remote;

import android.util.Base64;
import androidx.annotation.NonNull;
import androidx.annotation.VisibleForTesting;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class BloomFilter {
private final int bitCount;
private final byte[] bitmap;
private final int hashCount;
private final MessageDigest md5HashMessageDigest;

public BloomFilter(@NonNull byte[] bitmap, int padding, int hashCount) {
if (bitmap == null) {
throw new NullPointerException("Bitmap cannot be null.");
}
if (padding < 0 || padding >= 8) {
throw new IllegalArgumentException("Invalid padding: " + padding);
}
if (hashCount < 0) {
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
}
if (bitmap.length > 0 && hashCount == 0) {
// Only empty bloom filter can have 0 hash count.
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
}
if (bitmap.length == 0) {
// Empty bloom filter should have 0 padding.
if (padding != 0) {
throw new IllegalArgumentException(
"Expected padding of 0 when bitmap length is 0, but got " + padding);
}
}

this.bitmap = bitmap;
this.hashCount = hashCount;
this.bitCount = bitmap.length * 8 - padding;
this.md5HashMessageDigest = createMd5HashMessageDigest();
}

@VisibleForTesting
int getBitCount() {
return this.bitCount;
}

/**
* Check whether the given string is a possible member of the bloom filter. It might return false
* positive result, ie, the given string is not a member of the bloom filter, but the method
* returned true.
*
* @param value the string to be tested for membership.
* @return true if the given string might be contained in the bloom filter, or false if the given
* string is definitely not contained in the bloom filter.
*/
public boolean mightContain(@NonNull String value) {
// Empty bitmap should return false on membership check.
if (this.bitCount == 0) {
return false;
}

byte[] hashedValue = md5HashDigest(value);
if (hashedValue.length != 16) {
throw new RuntimeException(
"Invalid md5 hash array length: " + hashedValue.length + " (expected 16)");
}

long hash1 = getLongLittleEndian(hashedValue, 0);
long hash2 = getLongLittleEndian(hashedValue, 8);

for (int i = 0; i < this.hashCount; i++) {
int index = this.getBitIndex(hash1, hash2, i);
if (!this.isBitSet(index)) {
return false;
}
}
return true;
}

/** Hash a string using md5 hashing algorithm, and return an array of 16 bytes. */
@NonNull
private byte[] md5HashDigest(@NonNull String value) {
return md5HashMessageDigest.digest(value.getBytes(StandardCharsets.UTF_8));
}

@NonNull
private static MessageDigest createMd5HashMessageDigest() {
try {
return MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Missing MD5 MessageDigest provider: ", e);
}
}

/** Interpret 8 bytes into a long, using little endian 2’s complement. */
private static long getLongLittleEndian(@NonNull byte[] bytes, int offset) {
long result = 0;
for (int i = 0; i < 8; i++) {
result |= (bytes[offset + i] & 0xFFL) << (i * 8);
}
return result;
}

/**
* Calculate the ith hash value based on the hashed 64 bit unsigned integers, and calculate its
* corresponding bit index in the bitmap to be checked.
*/
private int getBitIndex(long hash1, long hash2, int hashIndex) {
// Calculate hashed value h(i) = h1 + (i * h2).
// Even though we are interpreting hash1 and hash2 as unsigned, the addition and multiplication
// operators still perform the correct operation and give the desired overflow behavior.
long combinedHash = hash1 + (hash2 * hashIndex);
long modulo = unsignedRemainder(combinedHash, this.bitCount);
return (int) modulo;
}

/**
* Calculate modulo, where the dividend and divisor are treated as unsigned 64-bit longs.
*
* <p>The implementation is taken from <a
* href="https://github.com/google/guava/blob/553037486901cc60820ab7dcb38a25b6f34eba43/android/guava/src/com/google/common/primitives/UnsignedLongs.java">Guava</a>,
* simplified to our needs.
*
* <p>
*/
private static long unsignedRemainder(long dividend, long divisor) {
long quotient = ((dividend >>> 1) / divisor) << 1;
long remainder = dividend - quotient * divisor;
return remainder - (remainder >= divisor ? divisor : 0);
}

/** Return whether the bit at the given index in the bitmap is set to 1. */
private boolean isBitSet(int index) {
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))).
byte byteAtIndex = this.bitmap[index / 8];
int offset = index % 8;
return (byteAtIndex & (0x01 << offset)) != 0;
}

@Override
public String toString() {
return "BloomFilter{"
+ "hashCount="
+ hashCount
+ ", size="
+ bitCount
+ ", bitmap=\""
+ Base64.encodeToString(bitmap, Base64.NO_WRAP)
+ "\"}";
}
}
Loading