Skip to content

Commit 80e0414

Browse files
committed
COMPAT: add refcheck kwarg and percolate out to app-level for PyPy
1 parent e221dd8 commit 80e0414

File tree

4 files changed

+11
-13
lines changed

4 files changed

+11
-13
lines changed

pandas/_libs/hashtable.pyx

+3-2
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,14 @@ cdef class Factorizer:
6565
array([ 0, 1, 20])
6666
"""
6767
labels = self.table.get_labels(values, self.uniques,
68-
self.count, na_sentinel, check_null)
68+
self.count, na_sentinel,
69+
check_null, refcheck=False)
6970
mask = (labels == na_sentinel)
7071
# sort on
7172
if sort:
7273
if labels.dtype != np.intp:
7374
labels = labels.astype(np.intp)
74-
sorter = self.uniques.to_array().argsort()
75+
sorter = self.uniques.to_array(refcheck=False).argsort()
7576
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
7677
reverse_indexer.put(sorter, np.arange(len(sorter)))
7778
labels = reverse_indexer.take(labels, mode='clip')

pandas/_libs/hashtable_class_helper.pxi.in

+5-8
Original file line numberDiff line numberDiff line change
@@ -573,7 +573,7 @@ cdef class StringHashTable(HashTable):
573573
# uniques
574574
uniques = ObjectVector()
575575
for i in range(count):
576-
uniques.append(values[uindexer[i]])
576+
uniques.append(values[uindexer[i]], refcheck=False)
577577
return uniques.to_array(refcheck=False)
578578

579579
def factorize(self, ndarray[object] values):
@@ -657,7 +657,6 @@ cdef class StringHashTable(HashTable):
657657
char **vecs
658658
khiter_t k
659659

660-
# refcheck ignored, for compatibility only
661660

662661
# these by-definition *must* be strings
663662
labels = np.zeros(n, dtype=np.int64)
@@ -697,7 +696,7 @@ cdef class StringHashTable(HashTable):
697696

698697
# uniques
699698
for i in range(count):
700-
uniques.append(values[uindexer[i]])
699+
uniques.append(values[uindexer[i]], refcheck=refcheck)
701700

702701
return np.asarray(labels)
703702

@@ -811,10 +810,10 @@ cdef class PyObjectHashTable(HashTable):
811810
k = kh_get_pymap(self.table, <PyObject*>val)
812811
if k == self.table.n_buckets:
813812
kh_put_pymap(self.table, <PyObject*>val, &ret)
814-
uniques.append(val)
813+
uniques.append(val, refcheck=False)
815814
elif not seen_na:
816815
seen_na = 1
817-
uniques.append(nan)
816+
uniques.append(nan, refcheck=False)
818817

819818
return uniques.to_array(refcheck=False)
820819

@@ -829,8 +828,6 @@ cdef class PyObjectHashTable(HashTable):
829828
object val
830829
khiter_t k
831830

832-
# refcheck ignored, for compatibility only
833-
834831
labels = np.empty(n, dtype=np.int64)
835832

836833
for i in range(n):
@@ -848,7 +845,7 @@ cdef class PyObjectHashTable(HashTable):
848845
else:
849846
k = kh_put_pymap(self.table, <PyObject*>val, &ret)
850847
self.table.vals[k] = count
851-
uniques.append(val)
848+
uniques.append(val, refcheck=refcheck)
852849
labels[i] = count
853850
count += 1
854851

pandas/core/reshape/merge.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1407,7 +1407,7 @@ def _factorize_keys(lk, rk, sort=True):
14071407
count = rizer.get_count()
14081408

14091409
if sort:
1410-
uniques = rizer.uniques.to_array()
1410+
uniques = rizer.uniques.to_array(refcheck=False)
14111411
llab, rlab = _sort_labels(uniques, llab, rlab)
14121412

14131413
# NA group

pandas/tests/test_algos.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1069,8 +1069,8 @@ def _test_vector_resize(htable, uniques, dtype, nvals):
10691069
# get_labels appends to the vector
10701070
htable.get_labels(vals[:nvals], uniques, 0, -1)
10711071
# to_array resizes the vector
1072-
uniques.to_array()
1073-
htable.get_labels(vals, uniques, 0, -1)
1072+
uniques.to_array(refcheck=False)
1073+
htable.get_labels(vals, uniques, 0, -1, refcheck=False)
10741074

10751075
test_cases = [
10761076
(hashtable.PyObjectHashTable, hashtable.ObjectVector, 'object'),

0 commit comments

Comments
 (0)