Skip to content

Commit f1abf14

Browse files
committed
CLN: Remove unused code in Factorizer classes (pandas-dev#49547)
1 parent dd39bb4 commit f1abf14

File tree

2 files changed

+5
-32
lines changed

2 files changed

+5
-32
lines changed

pandas/_libs/hashtable.pyx

+2-19
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ cdef class ObjectFactorizer(Factorizer):
9090
self.uniques = ObjectVector()
9191

9292
def factorize(
93-
self, ndarray[object] values, sort=False, na_sentinel=-1, na_value=None
93+
self, ndarray[object] values, na_sentinel=-1, na_value=None
9494
) -> np.ndarray:
9595
"""
9696

@@ -115,14 +115,6 @@ cdef class ObjectFactorizer(Factorizer):
115115
self.uniques = uniques
116116
labels = self.table.get_labels(values, self.uniques,
117117
self.count, na_sentinel, na_value)
118-
mask = (labels == na_sentinel)
119-
# sort on
120-
if sort:
121-
sorter = self.uniques.to_array().argsort()
122-
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
123-
reverse_indexer.put(sorter, np.arange(len(sorter)))
124-
labels = reverse_indexer.take(labels, mode='clip')
125-
labels[mask] = na_sentinel
126118
self.count = len(self.uniques)
127119
return labels
128120

@@ -136,7 +128,7 @@ cdef class Int64Factorizer(Factorizer):
136128
self.table = Int64HashTable(size_hint)
137129
self.uniques = Int64Vector()
138130

139-
def factorize(self, const int64_t[:] values, sort=False,
131+
def factorize(self, const int64_t[:] values,
140132
na_sentinel=-1, na_value=None) -> np.ndarray:
141133
"""
142134
Returns
@@ -161,14 +153,5 @@ cdef class Int64Factorizer(Factorizer):
161153
labels = self.table.get_labels(values, self.uniques,
162154
self.count, na_sentinel,
163155
na_value=na_value)
164-
165-
# sort on
166-
if sort:
167-
sorter = self.uniques.to_array().argsort()
168-
reverse_indexer = np.empty(len(sorter), dtype=np.intp)
169-
reverse_indexer.put(sorter, np.arange(len(sorter)))
170-
171-
labels = reverse_indexer.take(labels)
172-
173156
self.count = len(self.uniques)
174157
return labels

pandas/tests/test_algos.py

+3-13
Original file line numberDiff line numberDiff line change
@@ -230,21 +230,11 @@ def test_factorize_nan(self):
230230
key = np.array([1, 2, 1, np.nan], dtype="O")
231231
rizer = ht.ObjectFactorizer(len(key))
232232
for na_sentinel in (-1, 20):
233-
ids = rizer.factorize(key, sort=True, na_sentinel=na_sentinel)
234-
expected = np.array([0, 1, 0, na_sentinel], dtype="int32")
233+
ids = rizer.factorize(key, na_sentinel=na_sentinel)
234+
expected = np.array([0, 1, 0, na_sentinel], dtype=np.intp)
235235
assert len(set(key)) == len(set(expected))
236236
tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
237-
238-
# nan still maps to na_sentinel when sort=False
239-
key = np.array([0, np.nan, 1], dtype="O")
240-
na_sentinel = -1
241-
242-
# TODO(wesm): unused?
243-
ids = rizer.factorize(key, sort=False, na_sentinel=na_sentinel) # noqa
244-
245-
expected = np.array([2, -1, 0], dtype="int32")
246-
assert len(set(key)) == len(set(expected))
247-
tm.assert_numpy_array_equal(pd.isna(key), expected == na_sentinel)
237+
tm.assert_numpy_array_equal(ids, expected)
248238

249239
@pytest.mark.parametrize(
250240
"data, expected_codes, expected_uniques",

0 commit comments

Comments
 (0)