Skip to content

Commit 1fc5efd

Browse files
authored
ENH: adding support for Py3.6+ memory tracing for khash-maps (#38048)
1 parent e9c91f1 commit 1fc5efd

File tree

8 files changed

+188
-35
lines changed

8 files changed

+188
-35
lines changed

pandas/_libs/hashtable.pyx

+5-1
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,14 @@ cnp.import_array()
1313

1414

1515
from pandas._libs cimport util
16-
from pandas._libs.khash cimport kh_str_t, khiter_t
16+
from pandas._libs.khash cimport KHASH_TRACE_DOMAIN, kh_str_t, khiter_t
1717
from pandas._libs.missing cimport checknull
1818

1919

20+
def get_hashtable_trace_domain():
21+
return KHASH_TRACE_DOMAIN
22+
23+
2024
cdef int64_t NPY_NAT = util.get_nat()
2125
SIZE_HINT_LIMIT = (1 << 20) + 7
2226

pandas/_libs/hashtable_class_helper.pxi.in

+15-10
Original file line numberDiff line numberDiff line change
@@ -344,9 +344,11 @@ cdef class {{name}}HashTable(HashTable):
344344

345345
def sizeof(self, deep=False):
346346
""" return the size of my table in bytes """
347-
return self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
348-
sizeof(Py_ssize_t) + # vals
349-
sizeof(uint32_t)) # flags
347+
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
348+
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
349+
for_pairs = self.table.n_buckets * (sizeof({{dtype}}_t) + # keys
350+
sizeof(Py_ssize_t)) # vals
351+
return overhead + for_flags + for_pairs
350352

351353
cpdef get_item(self, {{dtype}}_t val):
352354
cdef:
@@ -669,10 +671,11 @@ cdef class StringHashTable(HashTable):
669671
self.table = NULL
670672

671673
def sizeof(self, deep=False):
672-
""" return the size of my table in bytes """
673-
return self.table.n_buckets * (sizeof(char *) + # keys
674-
sizeof(Py_ssize_t) + # vals
675-
sizeof(uint32_t)) # flags
674+
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
675+
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
676+
for_pairs = self.table.n_buckets * (sizeof(char *) + # keys
677+
sizeof(Py_ssize_t)) # vals
678+
return overhead + for_flags + for_pairs
676679

677680
cpdef get_item(self, str val):
678681
cdef:
@@ -994,9 +997,11 @@ cdef class PyObjectHashTable(HashTable):
994997

995998
def sizeof(self, deep=False):
996999
""" return the size of my table in bytes """
997-
return self.table.n_buckets * (sizeof(PyObject *) + # keys
998-
sizeof(Py_ssize_t) + # vals
999-
sizeof(uint32_t)) # flags
1000+
overhead = 4 * sizeof(uint32_t) + 3 * sizeof(uint32_t*)
1001+
for_flags = max(1, self.table.n_buckets >> 5) * sizeof(uint32_t)
1002+
for_pairs = self.table.n_buckets * (sizeof(PyObject *) + # keys
1003+
sizeof(Py_ssize_t)) # vals
1004+
return overhead + for_flags + for_pairs
10001005

10011006
cpdef get_item(self, object val):
10021007
cdef:

pandas/_libs/khash.pxd

+2
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ from numpy cimport (
1414

1515

1616
cdef extern from "khash_python.h":
17+
const int KHASH_TRACE_DOMAIN
18+
1719
ctypedef uint32_t khint_t
1820
ctypedef khint_t khiter_t
1921

pandas/_libs/src/klib/khash.h

+31-13
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,24 @@ int main() {
115115
#include "../inline_helper.h"
116116

117117

118+
// hooks for memory allocator, C-runtime allocator used per default
119+
#ifndef KHASH_MALLOC
120+
#define KHASH_MALLOC malloc
121+
#endif
122+
123+
#ifndef KHASH_REALLOC
124+
#define KHASH_REALLOC realloc
125+
#endif
126+
127+
#ifndef KHASH_CALLOC
128+
#define KHASH_CALLOC calloc
129+
#endif
130+
131+
#ifndef KHASH_FREE
132+
#define KHASH_FREE free
133+
#endif
134+
135+
118136
#if UINT_MAX == 0xffffffffu
119137
typedef unsigned int khint32_t;
120138
#elif ULONG_MAX == 0xffffffffu
@@ -138,7 +156,7 @@ typedef unsigned char khint8_t;
138156
#endif
139157

140158
typedef double khfloat64_t;
141-
typedef double khfloat32_t;
159+
typedef float khfloat32_t;
142160

143161
typedef khint32_t khint_t;
144162
typedef khint_t khiter_t;
@@ -265,14 +283,14 @@ static const double __ac_HASH_UPPER = 0.77;
265283
khval_t *vals; \
266284
} kh_##name##_t; \
267285
SCOPE kh_##name##_t *kh_init_##name(void) { \
268-
return (kh_##name##_t*)calloc(1, sizeof(kh_##name##_t)); \
286+
return (kh_##name##_t*)KHASH_CALLOC(1, sizeof(kh_##name##_t)); \
269287
} \
270288
SCOPE void kh_destroy_##name(kh_##name##_t *h) \
271289
{ \
272290
if (h) { \
273-
free(h->keys); free(h->flags); \
274-
free(h->vals); \
275-
free(h); \
291+
KHASH_FREE(h->keys); KHASH_FREE(h->flags); \
292+
KHASH_FREE(h->vals); \
293+
KHASH_FREE(h); \
276294
} \
277295
} \
278296
SCOPE void kh_clear_##name(kh_##name##_t *h) \
@@ -305,11 +323,11 @@ static const double __ac_HASH_UPPER = 0.77;
305323
if (new_n_buckets < 4) new_n_buckets = 4; \
306324
if (h->size >= (khint_t)(new_n_buckets * __ac_HASH_UPPER + 0.5)) j = 0; /* requested size is too small */ \
307325
else { /* hash table size to be changed (shrink or expand); rehash */ \
308-
new_flags = (khint32_t*)malloc(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
326+
new_flags = (khint32_t*)KHASH_MALLOC(__ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
309327
memset(new_flags, 0xff, __ac_fsize(new_n_buckets) * sizeof(khint32_t)); \
310328
if (h->n_buckets < new_n_buckets) { /* expand */ \
311-
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
312-
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
329+
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
330+
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
313331
} /* otherwise shrink */ \
314332
} \
315333
} \
@@ -342,10 +360,10 @@ static const double __ac_HASH_UPPER = 0.77;
342360
} \
343361
} \
344362
if (h->n_buckets > new_n_buckets) { /* shrink the hash table */ \
345-
h->keys = (khkey_t*)realloc(h->keys, new_n_buckets * sizeof(khkey_t)); \
346-
if (kh_is_map) h->vals = (khval_t*)realloc(h->vals, new_n_buckets * sizeof(khval_t)); \
363+
h->keys = (khkey_t*)KHASH_REALLOC(h->keys, new_n_buckets * sizeof(khkey_t)); \
364+
if (kh_is_map) h->vals = (khval_t*)KHASH_REALLOC(h->vals, new_n_buckets * sizeof(khval_t)); \
347365
} \
348-
free(h->flags); /* free the working space */ \
366+
KHASH_FREE(h->flags); /* free the working space */ \
349367
h->flags = new_flags; \
350368
h->n_buckets = new_n_buckets; \
351369
h->n_occupied = h->size; \
@@ -691,8 +709,8 @@ KHASH_MAP_INIT_INT64(int64, size_t)
691709
KHASH_MAP_INIT_UINT64(uint64, size_t)
692710
KHASH_MAP_INIT_INT16(int16, size_t)
693711
KHASH_MAP_INIT_UINT16(uint16, size_t)
694-
KHASH_MAP_INIT_INT16(int8, size_t)
695-
KHASH_MAP_INIT_UINT16(uint8, size_t)
712+
KHASH_MAP_INIT_INT8(int8, size_t)
713+
KHASH_MAP_INIT_UINT8(uint8, size_t)
696714

697715

698716
#endif /* __AC_KHASH_H */

pandas/_libs/src/klib/khash_python.h

+55-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,59 @@
11
#include <string.h>
22
#include <Python.h>
33

4+
// khash should report usage to tracemalloc
5+
#if PY_VERSION_HEX >= 0x03060000
6+
#include <pymem.h>
7+
#if PY_VERSION_HEX < 0x03070000
8+
#define PyTraceMalloc_Track _PyTraceMalloc_Track
9+
#define PyTraceMalloc_Untrack _PyTraceMalloc_Untrack
10+
#endif
11+
#else
12+
#define PyTraceMalloc_Track(...)
13+
#define PyTraceMalloc_Untrack(...)
14+
#endif
15+
16+
17+
static const int KHASH_TRACE_DOMAIN = 424242;
18+
void *traced_malloc(size_t size){
19+
void * ptr = malloc(size);
20+
if(ptr!=NULL){
21+
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
22+
}
23+
return ptr;
24+
}
25+
26+
void *traced_calloc(size_t num, size_t size){
27+
void * ptr = calloc(num, size);
28+
if(ptr!=NULL){
29+
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, num*size);
30+
}
31+
return ptr;
32+
}
33+
34+
void *traced_realloc(void* old_ptr, size_t size){
35+
void * ptr = realloc(old_ptr, size);
36+
if(ptr!=NULL){
37+
if(old_ptr != ptr){
38+
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)old_ptr);
39+
}
40+
PyTraceMalloc_Track(KHASH_TRACE_DOMAIN, (uintptr_t)ptr, size);
41+
}
42+
return ptr;
43+
}
44+
45+
void traced_free(void* ptr){
46+
if(ptr!=NULL){
47+
PyTraceMalloc_Untrack(KHASH_TRACE_DOMAIN, (uintptr_t)ptr);
48+
}
49+
free(ptr);
50+
}
51+
52+
53+
#define KHASH_MALLOC traced_malloc
54+
#define KHASH_REALLOC traced_realloc
55+
#define KHASH_CALLOC traced_calloc
56+
#define KHASH_FREE traced_free
457
#include "khash.h"
558

659
// Previously we were using the built in cpython hash function for doubles
@@ -128,7 +181,7 @@ typedef struct {
128181
typedef kh_str_starts_t* p_kh_str_starts_t;
129182

130183
p_kh_str_starts_t PANDAS_INLINE kh_init_str_starts(void) {
131-
kh_str_starts_t *result = (kh_str_starts_t*)calloc(1, sizeof(kh_str_starts_t));
184+
kh_str_starts_t *result = (kh_str_starts_t*)KHASH_CALLOC(1, sizeof(kh_str_starts_t));
132185
result->table = kh_init_str();
133186
return result;
134187
}
@@ -151,7 +204,7 @@ khint_t PANDAS_INLINE kh_get_str_starts_item(const kh_str_starts_t* table, const
151204

152205
void PANDAS_INLINE kh_destroy_str_starts(kh_str_starts_t* table) {
153206
kh_destroy_str(table->table);
154-
free(table);
207+
KHASH_FREE(table);
155208
}
156209

157210
void PANDAS_INLINE kh_resize_str_starts(kh_str_starts_t* table, khint_t val) {

pandas/core/frame.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2793,7 +2793,7 @@ def memory_usage(self, index=True, deep=False) -> Series:
27932793
many repeated values.
27942794
27952795
>>> df['object'].astype('category').memory_usage(deep=True)
2796-
5216
2796+
5244
27972797
"""
27982798
result = self._constructor_sliced(
27992799
[c.memory_usage(index=False, deep=deep) for col, c in self.items()],

pandas/tests/base/test_misc.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def test_memory_usage(index_or_series_obj):
7777
if isinstance(obj, Index):
7878
expected = 0
7979
else:
80-
expected = 80 if IS64 else 48
80+
expected = 108 if IS64 else 64
8181
assert res_deep == res == expected
8282
elif is_object or is_categorical:
8383
# only deep will pick them up

pandas/tests/libs/test_hashtable.py

+78-7
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from contextlib import contextmanager
2+
import tracemalloc
3+
14
import numpy as np
25
import pytest
36

@@ -6,9 +9,27 @@
69
import pandas._testing as tm
710

811

12+
@contextmanager
13+
def activated_tracemalloc():
14+
tracemalloc.start()
15+
try:
16+
yield
17+
finally:
18+
tracemalloc.stop()
19+
20+
21+
def get_allocated_khash_memory():
22+
snapshot = tracemalloc.take_snapshot()
23+
snapshot = snapshot.filter_traces(
24+
(tracemalloc.DomainFilter(True, ht.get_hashtable_trace_domain()),)
25+
)
26+
return sum(map(lambda x: x.size, snapshot.traces))
27+
28+
929
@pytest.mark.parametrize(
1030
"table_type, dtype",
1131
[
32+
(ht.PyObjectHashTable, np.object_),
1233
(ht.Int64HashTable, np.int64),
1334
(ht.UInt64HashTable, np.uint64),
1435
(ht.Float64HashTable, np.float64),
@@ -53,13 +74,15 @@ def test_get_set_contains_len(self, table_type, dtype):
5374
assert str(index + 2) in str(excinfo.value)
5475

5576
def test_map(self, table_type, dtype):
56-
N = 77
57-
table = table_type()
58-
keys = np.arange(N).astype(dtype)
59-
vals = np.arange(N).astype(np.int64) + N
60-
table.map(keys, vals)
61-
for i in range(N):
62-
assert table.get_item(keys[i]) == i + N
77+
# PyObjectHashTable has no map-method
78+
if table_type != ht.PyObjectHashTable:
79+
N = 77
80+
table = table_type()
81+
keys = np.arange(N).astype(dtype)
82+
vals = np.arange(N).astype(np.int64) + N
83+
table.map(keys, vals)
84+
for i in range(N):
85+
assert table.get_item(keys[i]) == i + N
6386

6487
def test_map_locations(self, table_type, dtype):
6588
N = 8
@@ -101,6 +124,53 @@ def test_unique(self, table_type, dtype):
101124
unique = table.unique(keys)
102125
tm.assert_numpy_array_equal(unique, expected)
103126

127+
def test_tracemalloc_works(self, table_type, dtype):
128+
if dtype in (np.int8, np.uint8):
129+
N = 256
130+
else:
131+
N = 30000
132+
keys = np.arange(N).astype(dtype)
133+
with activated_tracemalloc():
134+
table = table_type()
135+
table.map_locations(keys)
136+
used = get_allocated_khash_memory()
137+
my_size = table.sizeof()
138+
assert used == my_size
139+
del table
140+
assert get_allocated_khash_memory() == 0
141+
142+
def test_tracemalloc_for_empty(self, table_type, dtype):
143+
with activated_tracemalloc():
144+
table = table_type()
145+
used = get_allocated_khash_memory()
146+
my_size = table.sizeof()
147+
assert used == my_size
148+
del table
149+
assert get_allocated_khash_memory() == 0
150+
151+
152+
def test_tracemalloc_works_for_StringHashTable():
153+
N = 1000
154+
keys = np.arange(N).astype(np.compat.unicode).astype(np.object_)
155+
with activated_tracemalloc():
156+
table = ht.StringHashTable()
157+
table.map_locations(keys)
158+
used = get_allocated_khash_memory()
159+
my_size = table.sizeof()
160+
assert used == my_size
161+
del table
162+
assert get_allocated_khash_memory() == 0
163+
164+
165+
def test_tracemalloc_for_empty_StringHashTable():
166+
with activated_tracemalloc():
167+
table = ht.StringHashTable()
168+
used = get_allocated_khash_memory()
169+
my_size = table.sizeof()
170+
assert used == my_size
171+
del table
172+
assert get_allocated_khash_memory() == 0
173+
104174

105175
@pytest.mark.parametrize(
106176
"table_type, dtype",
@@ -157,6 +227,7 @@ def get_ht_function(fun_name, type_suffix):
157227
@pytest.mark.parametrize(
158228
"dtype, type_suffix",
159229
[
230+
(np.object_, "object"),
160231
(np.int64, "int64"),
161232
(np.uint64, "uint64"),
162233
(np.float64, "float64"),

0 commit comments

Comments
 (0)