Skip to content

Commit d3de0d8

Browse files
committed
fixing sizeof for StringHashTable
1 parent aab7beb commit d3de0d8

File tree

2 files changed

+18
-4
lines changed

2 files changed

+18
-4
lines changed

pandas/_libs/hashtable_class_helper.pxi.in

+5-4
Original file line numberDiff line numberDiff line change
@@ -671,10 +671,11 @@ cdef class StringHashTable(HashTable):
671671
self.table = NULL
672672

673673
def sizeof(self, deep=False):
674-
""" return the size of my table in bytes """
675-
return self.table.n_buckets * (sizeof(char *) + # keys
676-
sizeof(Py_ssize_t) + # vals
677-
sizeof(uint32_t)) # flags
674+
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
675+
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
676+
for_pairs = self.table.n_buckets * (sizeof(char *) + # keys
677+
sizeof(Py_ssize_t)) # vals
678+
return overhead + for_flags + for_pairs
678679

679680
cpdef get_item(self, str val):
680681
cdef:

pandas/tests/libs/test_hashtable.py

+13
Original file line numberDiff line numberDiff line change
@@ -140,6 +140,19 @@ def test_tracemalloc_works(self, table_type, dtype):
140140
assert get_allocated_khash_memory() == 0
141141

142142

143+
def test_tracemalloc_works_for_StringHashTable():
144+
N = 1000
145+
keys = np.arange(N).astype(np.unicode).astype(np.object)
146+
with activated_tracemalloc():
147+
table = ht.StringHashTable()
148+
table.map_locations(keys)
149+
used = get_allocated_khash_memory()
150+
my_size = table.sizeof()
151+
assert used == my_size
152+
del table
153+
assert get_allocated_khash_memory() == 0
154+
155+
143156
@pytest.mark.parametrize(
144157
"table_type, dtype",
145158
[

0 commit comments

Comments
 (0)