forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhash_table.py
97 lines (75 loc) · 3.13 KB
/
hash_table.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
"""
Hash Table is a data structure which stores data in an associative manner.
In a hash table, data is stored in an array format, where each data value has its own unique index value.
Access of data becomes very fast if we know the index of the desired data.
Thus, it becomes a data structure in which insertion and search operations are very fast
irrespective of the size of the data.
Hash Table uses an array as a storage medium and uses hash technique to generate an index
where an element is to be inserted or is to be located from.
Hashing is a technique to convert a range of key values into a range of indexes of an array.
We're going to use modulo operator to get a range of key values.
"""
#!/usr/bin/env python3
from .number_theory.prime_numbers import next_prime
class HashTable:
"""
Basic Hash Table example with open addressing and linear probing
"""
def __init__(self, size_table, charge_factor=None, lim_charge=None):
self.size_table = size_table
self.values = [None] * self.size_table
self.lim_charge = 0.75 if lim_charge is None else lim_charge
self.charge_factor = 1 if charge_factor is None else charge_factor
self.__aux_list = []
self._keys = {}
def keys(self):
return self._keys
def balanced_factor(self):
return sum(1 for slot in self.values if slot is not None) / (
self.size_table * self.charge_factor
)
def hash_function(self, key):
return key % self.size_table
def _step_by_step(self, step_ord):
print(f"step {step_ord}")
print([i for i in range(len(self.values))])
print(self.values)
def bulk_insert(self, values):
i = 1
self.__aux_list = values
for value in values:
self.insert_data(value)
self._step_by_step(i)
i += 1
def _set_value(self, key, data):
self.values[key] = data
self._keys[key] = data
def _collision_resolution(self, key, data=None):
new_key = self.hash_function(key + 1)
while self.values[new_key] is not None and self.values[new_key] != key:
if self.values.count(None) > 0:
new_key = self.hash_function(new_key + 1)
else:
new_key = None
break
return new_key
def rehashing(self):
survivor_values = [value for value in self.values if value is not None]
self.size_table = next_prime(self.size_table, factor=2)
self._keys.clear()
self.values = [None] * self.size_table # hell's pointers D: don't DRY ;/
for value in survivor_values:
self.insert_data(value)
def insert_data(self, data):
key = self.hash_function(data)
if self.values[key] is None:
self._set_value(key, data)
elif self.values[key] == data:
pass
else:
collision_resolution = self._collision_resolution(key, data)
if collision_resolution is not None:
self._set_value(collision_resolution, data)
else:
self.rehashing()
self.insert_data(data)