Skip to content

Commit e73de82

Browse files
topper-123proost
authored andcommitted
PERF: speed-up when scalar not found in Categorical's categories (pandas-dev#29750)
1 parent 49425e2 commit e73de82

File tree

5 files changed

+7
-6
lines changed

5 files changed

+7
-6
lines changed

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,7 @@ Performance improvements
357357
- Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`)
358358
- Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`)
359359
- Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`)
360+
- Performance improvement when searching for a scalar in a :meth:`Categorical` and the scalar is not found in the categories (:issue:`29750`)
360361

361362
.. _whatsnew_1000.bug_fixes:
362363

pandas/core/arrays/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -133,9 +133,9 @@ def func(self, other):
133133
return ret
134134
else:
135135
if opname == "__eq__":
136-
return np.repeat(False, len(self))
136+
return np.zeros(len(self), dtype=bool)
137137
elif opname == "__ne__":
138-
return np.repeat(True, len(self))
138+
return np.ones(len(self), dtype=bool)
139139
else:
140140
raise TypeError(
141141
f"Cannot compare a Categorical for op {opname} with a "

pandas/tests/arrays/categorical/test_operators.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def test_comparisons(self):
4848
tm.assert_numpy_array_equal(result, expected)
4949

5050
result = self.factor == "d"
51-
expected = np.repeat(False, len(self.factor))
51+
expected = np.zeros(len(self.factor), dtype=bool)
5252
tm.assert_numpy_array_equal(result, expected)
5353

5454
# comparisons with categoricals

pandas/tests/indexes/interval/test_interval.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -105,11 +105,11 @@ def test_with_nans(self, closed):
105105
assert index.hasnans is False
106106

107107
result = index.isna()
108-
expected = np.repeat(False, len(index))
108+
expected = np.zeros(len(index), dtype=bool)
109109
tm.assert_numpy_array_equal(result, expected)
110110

111111
result = index.notna()
112-
expected = np.repeat(True, len(index))
112+
expected = np.ones(len(index), dtype=bool)
113113
tm.assert_numpy_array_equal(result, expected)
114114

115115
index = self.create_index_with_nan(closed=closed)

pandas/tests/indexes/test_base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ def test_nanosecond_index_access(self):
730730
assert first_value == x[Timestamp(expected_ts)]
731731

732732
def test_booleanindex(self, index):
733-
bool_index = np.repeat(True, len(index)).astype(bool)
733+
bool_index = np.ones(len(index), dtype=bool)
734734
bool_index[5:30:2] = False
735735

736736
sub_index = index[bool_index]

0 commit comments

Comments
 (0)