From a16416b25554faeb834f7e196b7d652bd3e88583 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 20 Nov 2019 18:13:49 +0000 Subject: [PATCH 1/2] PERF: scalar not found in categorical's categories --- doc/source/whatsnew/v1.0.0.rst | 1 + pandas/core/arrays/categorical.py | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index a3d17b2b32353..00305360bbacc 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -343,6 +343,7 @@ Performance improvements - Performance improvement in :meth:`DataFrame.replace` when provided a list of values to replace (:issue:`28099`) - Performance improvement in :meth:`DataFrame.select_dtypes` by using vectorization instead of iterating over a loop (:issue:`28317`) - Performance improvement in :meth:`Categorical.searchsorted` and :meth:`CategoricalIndex.searchsorted` (:issue:`28795`) +- Performance improvement when searching for a scalar in a :meth:`Categorical` and the scalar is not found in the categories (:issue:`29750`) .. _whatsnew_1000.bug_fixes: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6e2a7b7a6e00..89b9c01883de8 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -133,9 +133,9 @@ def f(self, other): return ret else: if opname == "__eq__": - return np.repeat(False, len(self)) + return np.zeros(len(self), dtype=bool) elif opname == "__ne__": - return np.repeat(True, len(self)) + return np.ones(len(self), dtype=bool) else: msg = ( "Cannot compare a Categorical for op {op} with a " From 3d692608076eaf77bb2c8882ee07ffa673105078 Mon Sep 17 00:00:00 2001 From: tp Date: Wed, 20 Nov 2019 18:28:14 +0000 Subject: [PATCH 2/2] replace np.repeat for np.ones/.zeros in tests --- pandas/tests/arrays/categorical/test_operators.py | 2 +- pandas/tests/indexes/interval/test_interval.py | 4 ++-- pandas/tests/indexes/test_base.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index 22c1d5373372a..d62c4f4cf936e 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -48,7 +48,7 @@ def test_comparisons(self): tm.assert_numpy_array_equal(result, expected) result = self.factor == "d" - expected = np.repeat(False, len(self.factor)) + expected = np.zeros(len(self.factor), dtype=bool) tm.assert_numpy_array_equal(result, expected) # comparisons with categoricals diff --git a/pandas/tests/indexes/interval/test_interval.py b/pandas/tests/indexes/interval/test_interval.py index 73eacd8c4856e..f3c8c5cb6efa1 100644 --- a/pandas/tests/indexes/interval/test_interval.py +++ b/pandas/tests/indexes/interval/test_interval.py @@ -105,11 +105,11 @@ def test_with_nans(self, closed): assert index.hasnans is False result = index.isna() - expected = np.repeat(False, len(index)) + expected = np.zeros(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) result = index.notna() - expected = np.repeat(True, len(index)) + expected = np.ones(len(index), dtype=bool) tm.assert_numpy_array_equal(result, expected) index = self.create_index_with_nan(closed=closed) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index 5bfa13c0865f1..facc025409f08 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -730,7 +730,7 @@ def test_nanosecond_index_access(self): assert first_value == x[Timestamp(expected_ts)] def test_booleanindex(self, index): - bool_index = np.repeat(True, len(index)).astype(bool) + bool_index = np.ones(len(index), dtype=bool) bool_index[5:30:2] = False sub_index = index[bool_index]