Skip to content

Commit 832d777

Browse files
committed
maybe fixed perf?
1 parent 0b49072 commit 832d777

File tree

2 files changed

+6
-6
lines changed

2 files changed

+6
-6
lines changed

pandas/core/algorithms.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1, size_hint=None):
131131
(hash_klass, vec_klass), vals = _get_data_algo(vals, _hashtables)
132132

133133
table = hash_klass(size_hint or len(vals))
134-
uniques = vec_klass()
134+
uniques = vec_klass(len(vals))
135135
labels = table.get_labels(vals, uniques, 0, na_sentinel)
136136

137137
labels = com._ensure_platform_int(labels)

pandas/hashtable.pyx

+5-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# cython: profile=True
1+
# cython: profile=False
22

33
from cpython cimport PyObject, Py_INCREF, PyList_Check, PyTuple_Check
44

@@ -98,14 +98,14 @@ cdef class Int64Vector(Vector):
9898
cdef:
9999
int64_t *data
100100

101-
def __cinit__(self):
101+
def __cinit__(self, int64_t m = -1):
102102
self.n = 0
103-
self.m = _INIT_VEC_CAP
104-
self.ao = np.empty(_INIT_VEC_CAP, dtype=np.int64)
103+
self.m = _INIT_VEC_CAP if m == -1 else m
104+
self.ao = np.empty(self.m, dtype=np.int64)
105105
self.data = <int64_t*> self.ao.data
106106

107107
cdef resize(self):
108-
self.m = max(self.m * 2, _INIT_VEC_CAP)
108+
self.m = max(self.m * 4, _INIT_VEC_CAP)
109109
self.ao.resize(self.m)
110110
self.data = <int64_t*> self.ao.data
111111

0 commit comments

Comments
 (0)