Skip to content

Commit e197f7c

Browse files
committed
COMPAT/PERF move uniques to method scope
1 parent 3663b90 commit e197f7c

File tree

1 file changed

+11
-9
lines changed

1 file changed

+11
-9
lines changed

pandas/_libs/hashtable.pyx

+11-9
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,10 @@ include "hashtable_func_helper.pxi"
4646

4747
cdef class Factorizer:
4848
cdef public PyObjectHashTable table
49-
cdef public ObjectVector uniques
5049
cdef public Py_ssize_t count
5150

5251
def __init__(self, size_hint):
5352
self.table = PyObjectHashTable(size_hint)
54-
self.uniques = ObjectVector()
5553
self.count = 0
5654

5755
def get_count(self):
@@ -64,19 +62,22 @@ cdef class Factorizer:
6462
>>> factorize(np.array([1,2,np.nan], dtype='O'), na_sentinel=20)
6563
array([ 0, 1, 20])
6664
"""
67-
labels = self.table.get_labels(values, self.uniques,
65+
uniques = ObjectVector()
66+
labels = self.table.get_labels(values, uniques,
6867
self.count, na_sentinel, check_null)
6968
mask = (labels == na_sentinel)
69+
if len(labels) == 0:
70+
return labels
7071
# sort on
7172
if sort:
7273
if labels.dtype != np.intp:
7374
labels = labels.astype(np.intp)
74-
sorter = self.uniques.to_array().argsort()
75+
sorter = uniques.to_array().argsort()
7576
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
7677
reverse_indexer.put(sorter, np.arange(len(sorter)))
7778
labels = reverse_indexer.take(labels, mode='clip')
7879
labels[mask] = na_sentinel
79-
self.count = len(self.uniques)
80+
self.count = len(uniques)
8081
return labels
8182

8283
def unique(self, ndarray[object] values):
@@ -86,35 +87,36 @@ cdef class Factorizer:
8687

8788
cdef class Int64Factorizer:
8889
cdef public Int64HashTable table
89-
cdef public Int64Vector uniques
9090
cdef public Py_ssize_t count
9191

9292
def __init__(self, size_hint):
9393
self.table = Int64HashTable(size_hint)
94-
self.uniques = Int64Vector()
9594
self.count = 0
9695

9796
def get_count(self):
9897
return self.count
9998

10099
def factorize(self, int64_t[:] values, sort=False,
101100
na_sentinel=-1, check_null=True):
101+
uniques = Int64Vector()
102102
labels = self.table.get_labels(values, self.uniques,
103103
self.count, na_sentinel,
104104
check_null)
105105

106106
# sort on
107+
if len(labels) == 0:
108+
return labels
107109
if sort:
108110
if labels.dtype != np.intp:
109111
labels = labels.astype(np.intp)
110112

111-
sorter = self.uniques.to_array().argsort()
113+
sorter = uniques.to_array().argsort()
112114
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
113115
reverse_indexer.put(sorter, np.arange(len(sorter)))
114116

115117
labels = reverse_indexer.take(labels)
116118

117-
self.count = len(self.uniques)
119+
self.count = len(uniques)
118120
return labels
119121

120122

0 commit comments

Comments
 (0)