Skip to content

Implement BloomFilter class #4524

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 15 commits into from
Jan 20, 2023
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.firebase.firestore.remote;

import android.util.Base64;
import androidx.annotation.NonNull;
import androidx.annotation.VisibleForTesting;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

public class BloomFilter {
private final int size;
private final byte[] bitmap;
private final int hashCount;
private static MessageDigest md5HashMessageDigest;

public BloomFilter(@NonNull byte[] bitmap, int padding, int hashCount) {
if (bitmap == null) {
throw new NullPointerException("Bitmap cannot be null.");
}
if (padding < 0 || padding >= 8) {
throw new IllegalArgumentException("Invalid padding: " + padding);
}

if (bitmap.length > 0) {
// Only empty bloom filter can have 0 hash count.
if (hashCount <= 0) {
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
}
} else {
if (hashCount < 0) {
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
}

// Empty bloom filter should have 0 padding.
if (padding != 0) {
throw new IllegalArgumentException("Invalid padding when bitmap length is 0: " + padding);
}
}
this.bitmap = bitmap;
this.hashCount = hashCount;
this.size = bitmap.length * 8 - padding;
this.md5HashMessageDigest = getMd5HashMessageDigest();
}

private boolean isEmpty() {
return this.size == 0;
}

/** Returns the number of bits in the bloom filter. */
@VisibleForTesting
int getSize() {
return this.size;
}

/**
* Check whether the given string is a possible member of the bloom filter. It might return false
* positive result, ie, the given string is not a member of the bloom filter, but the method
* returned true.
*
* @param value the string to be tested membership.
* @return true if the given string might be contained in the bloom filter.
*/
public boolean mightContain(@NonNull String value) {
// Empty bitmap or empty value should always return false on membership check.
if (this.isEmpty() || value.isEmpty()) {
return false;
}

byte[] hashedValue = md5HashDigest(value);
if (hashedValue.length != 16) {
throw new RuntimeException(
"Invalid md5HashedValue.length: " + hashedValue.length + " (expected 16)");
}

long hash1 = getLongLittleEndian(hashedValue, 0);
long hash2 = getLongLittleEndian(hashedValue, 8);

for (int i = 0; i < this.hashCount; i++) {
int index = this.getBitIndex(hash1, hash2, i);
if (!this.isBitSet(index)) {
return false;
}
}
return true;
}

/** Hash a string using md5 hashing algorithm, and return an array of 16 bytes. */
@NonNull
private static byte[] md5HashDigest(@NonNull String value) {
return md5HashMessageDigest.digest(value.getBytes(StandardCharsets.UTF_8));
}

@NonNull
private static MessageDigest getMd5HashMessageDigest() {
MessageDigest digest;
try {
digest = MessageDigest.getInstance("MD5");
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Missing MD5 MessageDigest provider.", e);
}
return digest;
}

/** Interpret 8 bytes into a long, using little endian 2’s complement. */
private static long getLongLittleEndian(@NonNull byte[] bytes, int offset) {
long result = 0;
for (int i = 0; i < 8; i++) {
result |= (bytes[offset + i] & 0xFFL) << (i * 8);
}
return result;
}

/**
* Calculate the ith hash value based on the hashed 64bit integers, and calculate its
* corresponding bit index in the bitmap to be checked.
*/
private int getBitIndex(long hash1, long hash2, int index) {
// Calculate hashed value h(i) = h1 + (i * h2).
long combinedHash = hash1 + (hash2 * index);
long mod = unsignedRemainder(combinedHash, this.size);
return (int) mod;
}

/**
* Calculate modulo, where the dividend and divisor are treated as unsigned 64-bit longs.
*
* <p>The implementation is taken from <a
* href="https://github.com/google/guava/blob/553037486901cc60820ab7dcb38a25b6f34eba43/android/guava/src/com/google/common/primitives/UnsignedLongs.java">Guava</a>,
* simplified to our needs.
*
* <p>
*/
private static long unsignedRemainder(long dividend, long divisor) {
long quotient = ((dividend >>> 1) / divisor) << 1;
long remainder = dividend - quotient * divisor;
return remainder - (remainder >= divisor ? divisor : 0);
}

/** Return whether the bit at the given index in the bitmap is set to 1. */
private boolean isBitSet(int index) {
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))).
byte byteAtIndex = this.bitmap[(index / 8)];
int offset = index % 8;
return (byteAtIndex & (0x01 << offset)) != 0;
}

@Override
public String toString() {
return "BloomFilter{"
+ "hashCount="
+ hashCount
+ ", size="
+ size
+ ", bitmap=\""
+ Base64.encodeToString(bitmap, Base64.NO_WRAP)
+ "\"}";
}
}
Loading