|
| 1 | +/** |
| 2 | + * @license |
| 3 | + * Copyright 2022 Google LLC |
| 4 | + * |
| 5 | + * Licensed under the Apache License, Version 2.0 (the "License"); |
| 6 | + * you may not use this file except in compliance with the License. |
| 7 | + * You may obtain a copy of the License at |
| 8 | + * |
| 9 | + * http://www.apache.org/licenses/LICENSE-2.0 |
| 10 | + * |
| 11 | + * Unless required by applicable law or agreed to in writing, software |
| 12 | + * distributed under the License is distributed on an "AS IS" BASIS, |
| 13 | + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 14 | + * See the License for the specific language governing permissions and |
| 15 | + * limitations under the License. |
| 16 | + */ |
| 17 | +import { Md5, Integer } from '@firebase/webchannel-wrapper'; |
| 18 | + |
| 19 | +import { newTextEncoder } from '../platform/serializer'; |
| 20 | +import { debugAssert } from '../util/assert'; |
| 21 | + |
| 22 | +const MAX_64_BIT_UNSIGNED_INTEGER = new Integer([0xffffffff, 0xffffffff], 0); |
| 23 | + |
| 24 | +// Hash a string using md5 hashing algorithm. |
| 25 | +function getMd5HashValue(value: string): Uint8Array { |
| 26 | + const encodedValue = newTextEncoder().encode(value); |
| 27 | + const md5 = new Md5(); |
| 28 | + md5.update(encodedValue); |
| 29 | + return new Uint8Array(md5.digest()); |
| 30 | +} |
| 31 | + |
| 32 | +// Interpret the 16 bytes array as two 64-bit unsigned integers, encoded using |
| 33 | +// 2’s complement using little endian. |
| 34 | +function get64BitUints(Bytes: Uint8Array): [Integer, Integer] { |
| 35 | + const dataView = new DataView(Bytes.buffer); |
| 36 | + const chunk1 = dataView.getUint32(0, /* littleEndian= */ true); |
| 37 | + const chunk2 = dataView.getUint32(4, /* littleEndian= */ true); |
| 38 | + const chunk3 = dataView.getUint32(8, /* littleEndian= */ true); |
| 39 | + const chunk4 = dataView.getUint32(12, /* littleEndian= */ true); |
| 40 | + const integer1 = new Integer([chunk1, chunk2], 0); |
| 41 | + const integer2 = new Integer([chunk3, chunk4], 0); |
| 42 | + return [integer1, integer2]; |
| 43 | +} |
| 44 | + |
| 45 | +export class BloomFilter { |
| 46 | + readonly size: number; |
| 47 | + private readonly sizeInInteger: Integer; |
| 48 | + |
| 49 | + constructor( |
| 50 | + private readonly bitmap: Uint8Array, |
| 51 | + padding: number, |
| 52 | + private readonly hashCount: number |
| 53 | + ) { |
| 54 | + debugAssert(padding >= 0 && padding < 8, `Invalid padding: ${padding}`); |
| 55 | + if (bitmap.length > 0) { |
| 56 | + debugAssert(this.hashCount > 0, `Invalid hash count: ${hashCount}`); |
| 57 | + } else { |
| 58 | + // Only empty bloom filter can have 0 hash count. |
| 59 | + debugAssert(this.hashCount >= 0, `Invalid hash count: ${hashCount}`); |
| 60 | + |
| 61 | + // Empty bloom filter should have 0 padding. |
| 62 | + debugAssert( |
| 63 | + padding === 0, |
| 64 | + `Invalid padding when bitmap length is 0: ${padding}` |
| 65 | + ); |
| 66 | + } |
| 67 | + |
| 68 | + this.size = bitmap.length * 8 - padding; |
| 69 | + // Set the size in Integer to avoid repeated calculation in mightContain(). |
| 70 | + this.sizeInInteger = Integer.fromNumber(this.size); |
| 71 | + } |
| 72 | + |
| 73 | + // Calculate the ith hash value based on the hashed 64bit integers, |
| 74 | + // and calculate its corresponding bit index in the bitmap to be checked. |
| 75 | + private getBitIndex(num1: Integer, num2: Integer, index: number): number { |
| 76 | + // Calculate hashed value h(i) = h1 + (i * h2). |
| 77 | + let hashValue = num1.add(num2.multiply(Integer.fromNumber(index))); |
| 78 | + // Wrap if hash value overflow 64bit. |
| 79 | + if (hashValue.compare(MAX_64_BIT_UNSIGNED_INTEGER) === 1) { |
| 80 | + hashValue = new Integer([hashValue.getBits(0), hashValue.getBits(1)], 0); |
| 81 | + } |
| 82 | + return hashValue.modulo(this.sizeInInteger).toNumber(); |
| 83 | + } |
| 84 | + |
| 85 | + // Return whether the bit on the given index in the bitmap is set to 1. |
| 86 | + private isBitSet(index: number): boolean { |
| 87 | + // To retrieve bit n, calculate: (bitmap[n / 8] & (0x01 << (n % 8))). |
| 88 | + const byte = this.bitmap[Math.floor(index / 8)]; |
| 89 | + const offset = index % 8; |
| 90 | + return (byte & (0x01 << offset)) !== 0; |
| 91 | + } |
| 92 | + |
| 93 | + mightContain(value: string): boolean { |
| 94 | + // Empty bitmap and empty value should always return false on membership |
| 95 | + // check. |
| 96 | + if (this.size === 0 || value === '') { |
| 97 | + return false; |
| 98 | + } |
| 99 | + |
| 100 | + const md5HashedValue = getMd5HashValue(value); |
| 101 | + const [hash1, hash2] = get64BitUints(md5HashedValue); |
| 102 | + for (let i = 0; i < this.hashCount; i++) { |
| 103 | + const index = this.getBitIndex(hash1, hash2, i); |
| 104 | + if (!this.isBitSet(index)) { |
| 105 | + return false; |
| 106 | + } |
| 107 | + } |
| 108 | + return true; |
| 109 | + } |
| 110 | +} |
0 commit comments