Skip to content

Commit b306135

Browse files
authored
Implement BloomFilter class (#4524)
1 parent fd907e5 commit b306135

File tree

26 files changed

+482
-0
lines changed

26 files changed

+482
-0
lines changed
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
// Copyright 2023 Google LLC
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package com.google.firebase.firestore.remote;
16+
17+
import android.util.Base64;
18+
import androidx.annotation.NonNull;
19+
import androidx.annotation.VisibleForTesting;
20+
import java.nio.charset.StandardCharsets;
21+
import java.security.MessageDigest;
22+
import java.security.NoSuchAlgorithmException;
23+
24+
public class BloomFilter {
25+
private final int bitCount;
26+
private final byte[] bitmap;
27+
private final int hashCount;
28+
private final MessageDigest md5HashMessageDigest;
29+
30+
public BloomFilter(@NonNull byte[] bitmap, int padding, int hashCount) {
31+
if (bitmap == null) {
32+
throw new NullPointerException("Bitmap cannot be null.");
33+
}
34+
if (padding < 0 || padding >= 8) {
35+
throw new IllegalArgumentException("Invalid padding: " + padding);
36+
}
37+
if (hashCount < 0) {
38+
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
39+
}
40+
if (bitmap.length > 0 && hashCount == 0) {
41+
// Only empty bloom filter can have 0 hash count.
42+
throw new IllegalArgumentException("Invalid hash count: " + hashCount);
43+
}
44+
if (bitmap.length == 0) {
45+
// Empty bloom filter should have 0 padding.
46+
if (padding != 0) {
47+
throw new IllegalArgumentException(
48+
"Expected padding of 0 when bitmap length is 0, but got " + padding);
49+
}
50+
}
51+
52+
this.bitmap = bitmap;
53+
this.hashCount = hashCount;
54+
this.bitCount = bitmap.length * 8 - padding;
55+
this.md5HashMessageDigest = createMd5HashMessageDigest();
56+
}
57+
58+
@VisibleForTesting
59+
int getBitCount() {
60+
return this.bitCount;
61+
}
62+
63+
/**
64+
* Check whether the given string is a possible member of the bloom filter. It might return false
65+
* positive result, ie, the given string is not a member of the bloom filter, but the method
66+
* returned true.
67+
*
68+
* @param value the string to be tested for membership.
69+
* @return true if the given string might be contained in the bloom filter, or false if the given
70+
* string is definitely not contained in the bloom filter.
71+
*/
72+
public boolean mightContain(@NonNull String value) {
73+
// Empty bitmap should return false on membership check.
74+
if (this.bitCount == 0) {
75+
return false;
76+
}
77+
78+
byte[] hashedValue = md5HashDigest(value);
79+
if (hashedValue.length != 16) {
80+
throw new RuntimeException(
81+
"Invalid md5 hash array length: " + hashedValue.length + " (expected 16)");
82+
}
83+
84+
long hash1 = getLongLittleEndian(hashedValue, 0);
85+
long hash2 = getLongLittleEndian(hashedValue, 8);
86+
87+
for (int i = 0; i < this.hashCount; i++) {
88+
int index = this.getBitIndex(hash1, hash2, i);
89+
if (!this.isBitSet(index)) {
90+
return false;
91+
}
92+
}
93+
return true;
94+
}
95+
96+
/** Hash a string using md5 hashing algorithm, and return an array of 16 bytes. */
97+
@NonNull
98+
private byte[] md5HashDigest(@NonNull String value) {
99+
return md5HashMessageDigest.digest(value.getBytes(StandardCharsets.UTF_8));
100+
}
101+
102+
@NonNull
103+
private static MessageDigest createMd5HashMessageDigest() {
104+
try {
105+
return MessageDigest.getInstance("MD5");
106+
} catch (NoSuchAlgorithmException e) {
107+
throw new RuntimeException("Missing MD5 MessageDigest provider: ", e);
108+
}
109+
}
110+
111+
/** Interpret 8 bytes into a long, using little endian 2’s complement. */
112+
private static long getLongLittleEndian(@NonNull byte[] bytes, int offset) {
113+
long result = 0;
114+
for (int i = 0; i < 8; i++) {
115+
result |= (bytes[offset + i] & 0xFFL) << (i * 8);
116+
}
117+
return result;
118+
}
119+
120+
/**
121+
* Calculate the ith hash value based on the hashed 64 bit unsigned integers, and calculate its
122+
* corresponding bit index in the bitmap to be checked.
123+
*/
124+
private int getBitIndex(long hash1, long hash2, int hashIndex) {
125+
// Calculate hashed value h(i) = h1 + (i * h2).
126+
// Even though we are interpreting hash1 and hash2 as unsigned, the addition and multiplication
127+
// operators still perform the correct operation and give the desired overflow behavior.
128+
long combinedHash = hash1 + (hash2 * hashIndex);
129+
long modulo = unsignedRemainder(combinedHash, this.bitCount);
130+
return (int) modulo;
131+
}
132+
133+
/**
134+
* Calculate modulo, where the dividend and divisor are treated as unsigned 64-bit longs.
135+
*
136+
* <p>The implementation is taken from <a
137+
* href="https://github.com/google/guava/blob/553037486901cc60820ab7dcb38a25b6f34eba43/android/guava/src/com/google/common/primitives/UnsignedLongs.java">Guava</a>,
138+
* simplified to our needs.
139+
*
140+
* <p>
141+
*/
142+
private static long unsignedRemainder(long dividend, long divisor) {
143+
long quotient = ((dividend >>> 1) / divisor) << 1;
144+
long remainder = dividend - quotient * divisor;
145+
return remainder - (remainder >= divisor ? divisor : 0);
146+
}
147+
148+
/** Return whether the bit at the given index in the bitmap is set to 1. */
149+
private boolean isBitSet(int index) {
150+
// To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))).
151+
byte byteAtIndex = this.bitmap[index / 8];
152+
int offset = index % 8;
153+
return (byteAtIndex & (0x01 << offset)) != 0;
154+
}
155+
156+
@Override
157+
public String toString() {
158+
return "BloomFilter{"
159+
+ "hashCount="
160+
+ hashCount
161+
+ ", size="
162+
+ bitCount
163+
+ ", bitmap=\""
164+
+ Base64.encodeToString(bitmap, Base64.NO_WRAP)
165+
+ "\"}";
166+
}
167+
}

0 commit comments

Comments
 (0)