Skip to content

Commit d9153d3

Browse files
committed
introduce get_state - a tool to investigate the state of a hashmap
1 parent d0cfa03 commit d9153d3

File tree

2 files changed

+38
-0
lines changed

2 files changed

+38
-0
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+30
Original file line numberDiff line numberDiff line change
@@ -420,6 +420,15 @@ cdef class {{name}}HashTable(HashTable):
420420
sizeof(Py_ssize_t)) # vals
421421
return overhead + for_flags + for_pairs
422422

423+
def get_state(self):
424+
""" returns infos about the state of the hashtable"""
425+
return {
426+
'n_buckets' : self.table.n_buckets,
427+
'size' : self.table.size,
428+
'n_occupied' : self.table.n_occupied,
429+
'upper_bound' : self.table.upper_bound,
430+
}
431+
423432
cpdef get_item(self, {{dtype}}_t val):
424433
cdef:
425434
khiter_t k
@@ -747,6 +756,15 @@ cdef class StringHashTable(HashTable):
747756
sizeof(Py_ssize_t)) # vals
748757
return overhead + for_flags + for_pairs
749758

759+
def get_state(self):
760+
""" returns infos about the state of the hashtable"""
761+
return {
762+
'n_buckets' : self.table.n_buckets,
763+
'size' : self.table.size,
764+
'n_occupied' : self.table.n_occupied,
765+
'upper_bound' : self.table.upper_bound,
766+
}
767+
750768
cpdef get_item(self, str val):
751769
cdef:
752770
khiter_t k
@@ -1072,6 +1090,18 @@ cdef class PyObjectHashTable(HashTable):
10721090
sizeof(Py_ssize_t)) # vals
10731091
return overhead + for_flags + for_pairs
10741092

1093+
def get_state(self):
1094+
"""
1095+
returns infos about the current state of the hashtable like size,
1096+
number of buckets and so on.
1097+
"""
1098+
return {
1099+
'n_buckets' : self.table.n_buckets,
1100+
'size' : self.table.size,
1101+
'n_occupied' : self.table.n_occupied,
1102+
'upper_bound' : self.table.upper_bound,
1103+
}
1104+
10751105
cpdef get_item(self, object val):
10761106
cdef:
10771107
khiter_t k

pandas/tests/libs/test_hashtable.py

+8
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,14 @@ def test_tracemalloc_for_empty(self, table_type, dtype):
155155
del table
156156
assert get_allocated_khash_memory() == 0
157157

158+
def test_get_state(self, table_type, dtype):
159+
table = table_type(1000)
160+
state = table.get_state()
161+
assert state["size"] == 0
162+
assert state["n_occupied"] == 0
163+
assert "n_buckets" in state
164+
assert "upper_bound" in state
165+
158166

159167
def test_get_labels_groupby_for_Int64(writable):
160168
table = ht.Int64HashTable()

0 commit comments

Comments
 (0)