Skip to content

Commit 7878db0

Browse files
author
tp
committed
Add __contains__ to CategoricalIndex
1 parent 0c65c57 commit 7878db0

File tree

3 files changed

+27
-12
lines changed

3 files changed

+27
-12
lines changed

asv_bench/benchmarks/categoricals.py

+14
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,17 @@ def time_categorical_series_is_monotonic_increasing(self):
193193

194194
def time_categorical_series_is_monotonic_decreasing(self):
195195
self.s.is_monotonic_decreasing
196+
197+
198+
class Contains(object):
199+
200+
goal_time = 0.2
201+
202+
def setup(self):
203+
N = 10**5
204+
ncats = 100
205+
self.ci = tm.makeCategoricalIndex(N, ncats)
206+
self.cat = self.ci.categories[0]
207+
208+
def time_contains(self):
209+
self.cat in self.ci

doc/source/whatsnew/v0.24.0.txt

+3-1
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,9 @@ Performance Improvements
6464
~~~~~~~~~~~~~~~~~~~~~~~~
6565

6666
- Improved performance of :func:`Series.describe` in case of numeric dtpyes (:issue:`21274`)
67-
-
67+
- Improved performance of membership checks in :class:`CategoricalIndex`
68+
(i.e. ``x in ci``-style checks are much faster). :meth:`CategoricalIndex.contains`
69+
is likewise much faster (:issue:`21369`)
6870

6971
.. _whatsnew_0240.docs:
7072

pandas/core/indexes/category.py

+10-11
Original file line numberDiff line numberDiff line change
@@ -324,20 +324,19 @@ def _reverse_indexer(self):
324324
@Appender(_index_shared_docs['__contains__'] % _index_doc_kwargs)
325325
def __contains__(self, key):
326326
hash(key)
327-
328-
if self.categories._defer_to_indexing:
329-
return key in self.categories
330-
331-
return key in self.values
327+
if isna(key):
328+
return self.isna().any()
329+
elif self.categories._defer_to_indexing: # e.g. Interval values
330+
loc = self.categories.get_loc(key)
331+
return np.isin(self.codes, loc).any()
332+
elif key in self.categories:
333+
return self.categories.get_loc(key) in self._engine
334+
else:
335+
return False
332336

333337
@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
334338
def contains(self, key):
335-
hash(key)
336-
337-
if self.categories._defer_to_indexing:
338-
return self.categories.contains(key)
339-
340-
return key in self.values
339+
return key in self
341340

342341
def __array__(self, dtype=None):
343342
""" the array interface, return my values """

0 commit comments

Comments
 (0)