Skip to content

Commit 28e6691

Browse files
committed
removed test_probability
1 parent 8b1bec0 commit 28e6691

File tree

1 file changed

+0
-40
lines changed

1 file changed

+0
-40
lines changed

data_structures/hashing/bloom_filter.py

-40
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,6 @@
5050
0.140625
5151
"""
5252
from hashlib import md5, sha256
53-
from random import choices
54-
from string import ascii_lowercase
5553

5654
HASH_FUNCTIONS = (sha256, md5)
5755

@@ -95,41 +93,3 @@ def estimated_error_rate(self):
9593
n_ones = bin(self.bitarray).count("1")
9694
k = len(HASH_FUNCTIONS)
9795
return (n_ones / self.size) ** k
98-
99-
100-
def random_string(size: int) -> str:
101-
return "".join(choices(ascii_lowercase + " ", k=size))
102-
103-
104-
def test_probability(filter_bits: int = 64, added_elements: int = 20) -> None:
105-
b = Bloom(size=filter_bits)
106-
107-
k = len(HASH_FUNCTIONS)
108-
estimated_error_rate_beforehand = (
109-
1 - (1 - 1 / filter_bits) ** (k * added_elements)
110-
) ** k
111-
112-
not_added = {random_string(10) for i in range(1000)}
113-
for _ in range(added_elements):
114-
b.add(not_added.pop())
115-
116-
n_ones = bin(b.bitarray).count("1")
117-
estimated_error_rate = (n_ones / filter_bits) ** k
118-
119-
errors = 0
120-
for string in not_added:
121-
if b.exists(string):
122-
errors += 1
123-
error_rate = errors / len(not_added)
124-
125-
print(f"error_rate = {errors}/{len(not_added)} = {error_rate}")
126-
print(f"{estimated_error_rate=}")
127-
print(f"{estimated_error_rate_beforehand=}")
128-
129-
assert (
130-
abs(estimated_error_rate - error_rate) <= 0.05
131-
) # 5% absolute margin calculated experiementally
132-
133-
134-
if __name__ == "__main__":
135-
test_probability()

0 commit comments

Comments
 (0)