1
1
"""
2
2
See https://en.wikipedia.org/wiki/Bloom_filter
3
+
4
+ >>> b = Bloom()
5
+ >>> b.add("Titanic")
6
+ >>> b.add("Avatar")
7
+ >>> b.exists("Titanic")
8
+ True
9
+ >>> b.exists("Avatar")
10
+ True
11
+ >>> b.exists("The Goodfather")
12
+ False
13
+ >>> b.exists("Interstellar")
14
+ False
15
+ >>> b.exists("Parasite")
16
+ False
17
+ >>> b.exists("Pulp Fiction")
18
+ False
3
19
"""
4
20
from hashlib import md5 , sha256
5
21
from random import choices
@@ -17,26 +33,27 @@ def __init__(self, size: int = 8) -> None:
17
33
def add (self , value : str ) -> None :
18
34
h = self .hash_ (value )
19
35
self .bitstring |= h
20
- print (
21
- f"""\
22
- [add] value = { value }
23
- hash = { self .format_bin (h )}
24
- filter = { self .format_bin (self .bitstring )}
25
- """
26
- )
36
+
37
+ # print(
38
+ # f"""\
39
+ # [add] value = {value}
40
+ # hash = {self.format_bin(h)}
41
+ # filter = {self.format_bin(self.bitstring)}
42
+ # """
43
+ # )
27
44
28
45
def exists (self , value : str ) -> bool :
29
46
h = self .hash_ (value )
30
47
res = (h & self .bitstring ) == h
31
48
32
- print (
33
- f"""\
34
- [exists] value = { value }
35
- hash = { self .format_bin (h )}
36
- filter = { self .format_bin (self .bitstring )}
37
- res = { res }
38
- """
39
- )
49
+ # print(
50
+ # f"""\
51
+ # [exists] value = {value}
52
+ # hash = {self.format_bin(h)}
53
+ # filter = {self.format_bin(self.bitstring)}
54
+ # res = {res}
55
+ # """
56
+ # )
40
57
return res
41
58
42
59
def format_bin (self , value : int ) -> str :
@@ -52,20 +69,6 @@ def hash_(self, value: str) -> int:
52
69
return res
53
70
54
71
55
- def test_movies () -> None :
56
- b = Bloom ()
57
- b .add ("Titanic" )
58
- b .add ("Avatar" )
59
-
60
- assert b .exists ("Titanic" )
61
- assert b .exists ("Avatar" )
62
-
63
- assert b .exists ("The Goodfather" ) in (True , False )
64
- assert b .exists ("Interstellar" ) in (True , False )
65
- assert b .exists ("Parasite" ) in (True , False )
66
- assert b .exists ("Pulp Fiction" ) in (True , False )
67
-
68
-
69
72
def random_string (size : int ) -> str :
70
73
return "" .join (choices (ascii_lowercase + " " , k = size ))
71
74
@@ -101,5 +104,4 @@ def test_probability(filter_bits: int = 64, added_elements: int = 20) -> None:
101
104
102
105
103
106
if __name__ == "__main__" :
104
- test_movies ()
105
107
test_probability ()
0 commit comments