|
50 | 50 | 0.140625
|
51 | 51 | """
|
52 | 52 | from hashlib import md5, sha256
|
53 |
| -from random import choices |
54 |
| -from string import ascii_lowercase |
55 | 53 |
|
56 | 54 | HASH_FUNCTIONS = (sha256, md5)
|
57 | 55 |
|
@@ -95,41 +93,3 @@ def estimated_error_rate(self):
|
95 | 93 | n_ones = bin(self.bitarray).count("1")
|
96 | 94 | k = len(HASH_FUNCTIONS)
|
97 | 95 | return (n_ones / self.size) ** k
|
98 |
| - |
99 |
| - |
100 |
| -def random_string(size: int) -> str: |
101 |
| - return "".join(choices(ascii_lowercase + " ", k=size)) |
102 |
| - |
103 |
| - |
104 |
| -def test_probability(filter_bits: int = 64, added_elements: int = 20) -> None: |
105 |
| - b = Bloom(size=filter_bits) |
106 |
| - |
107 |
| - k = len(HASH_FUNCTIONS) |
108 |
| - estimated_error_rate_beforehand = ( |
109 |
| - 1 - (1 - 1 / filter_bits) ** (k * added_elements) |
110 |
| - ) ** k |
111 |
| - |
112 |
| - not_added = {random_string(10) for i in range(1000)} |
113 |
| - for _ in range(added_elements): |
114 |
| - b.add(not_added.pop()) |
115 |
| - |
116 |
| - n_ones = bin(b.bitarray).count("1") |
117 |
| - estimated_error_rate = (n_ones / filter_bits) ** k |
118 |
| - |
119 |
| - errors = 0 |
120 |
| - for string in not_added: |
121 |
| - if b.exists(string): |
122 |
| - errors += 1 |
123 |
| - error_rate = errors / len(not_added) |
124 |
| - |
125 |
| - print(f"error_rate = {errors}/{len(not_added)} = {error_rate}") |
126 |
| - print(f"{estimated_error_rate=}") |
127 |
| - print(f"{estimated_error_rate_beforehand=}") |
128 |
| - |
129 |
| - assert ( |
130 |
| - abs(estimated_error_rate - error_rate) <= 0.05 |
131 |
| - ) # 5% absolute margin calculated experiementally |
132 |
| - |
133 |
| - |
134 |
| -if __name__ == "__main__": |
135 |
| - test_probability() |
0 commit comments