Skip to content

Commit 913790d

Browse files
author
tp
committed
Reimplenent Categorical._contains
1 parent f5fd77c commit 913790d

File tree

2 files changed

+52
-56
lines changed

2 files changed

+52
-56
lines changed

pandas/core/arrays/categorical.py

Lines changed: 50 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,55 @@ def _maybe_to_categorical(array):
157157
return array
158158

159159

160+
def contains(cat, key, container):
161+
"""
162+
Helper for membership check for ``key`` in ``cat``.
163+
164+
This is a helper method for :method:`__contains__`
165+
and :class:`CategoricalIndex.__contains__`.
166+
167+
Returns True if ``key`` is in ``cat.categories`` and the
168+
location of ``key`` in ``categories`` is in ``container``.
169+
170+
Parameters
171+
----------
172+
cat : :class:`Categorical`or :class:`categoricalIndex`
173+
key : a hashable object
174+
The key to check membership for.
175+
container : Container (e.g. list-like or mapping)
176+
The container to check for membership in.
177+
178+
Returns
179+
-------
180+
is_in : bool
181+
True if ``key`` is in ``self.categories`` and location of
182+
``key`` in ``categories`` is in ``container``, else False.
183+
184+
Notes
185+
-----
186+
This method does not check for Nan values. Do that separately
187+
before calling this method.
188+
"""
189+
# get location of key in categories.
190+
# If a KeyError, the key isn't in categories, so logically
191+
# can't be in container either.
192+
try:
193+
loc = cat.categories.get_loc(key)
194+
except KeyError:
195+
return False
196+
197+
# loc is the location of key in categories, but also the *value*
198+
# for key in container. So, `key` may be in categories,
199+
# but still not in `container`. Example ('b' in categories,
200+
# but not in values):
201+
# 'b' in Categorical(['a'], categories=['a', 'b']) # False
202+
if is_scalar(loc):
203+
return loc in container
204+
else:
205+
# if categories is an IntervalIndex, loc is an array.
206+
return any(loc_ in container for loc_ in loc)
207+
208+
160209
_codes_doc = """The category codes of this categorical.
161210
162211
Level codes are an array if integer which are the positions of the real
@@ -1846,66 +1895,14 @@ def __iter__(self):
18461895
"""Returns an Iterator over the values of this Categorical."""
18471896
return iter(self.get_values().tolist())
18481897

1849-
@staticmethod
1850-
def _contains(key, categories, container):
1851-
"""
1852-
Helper for membership check for ``key``.
1853-
1854-
This helper method is used in :method:`Categorical.__contains__`
1855-
and in :class:`CategoricalIndex.__contains__`.
1856-
1857-
Returns True if ``key`` is in ``categories`` and the
1858-
location of ``key`` in ``categories`` is in ``container``.
1859-
1860-
Parameters
1861-
----------
1862-
key : a hashable object
1863-
The key to check membership for.
1864-
categories : Sequence
1865-
The possible values for ``key``. The location for ``key``
1866-
in ``categories`` is also its value in ``container``
1867-
container : Container (e.g. list-like or mapping)
1868-
The container to check for membership in.
1869-
1870-
Returns
1871-
-------
1872-
is_in : bool
1873-
True if ``key`` is in ``categories`` and location of
1874-
``key`` in ``categories`` is in ``container``, else False.
1875-
1876-
Notes
1877-
-----
1878-
This method does not check for Nan values. Do that separately
1879-
before calling this method.
1880-
"""
1881-
1882-
# is key in categories? Then get its location in categories.
1883-
# If not (i.e. KeyError), its location logically can't be in
1884-
# container either.
1885-
try:
1886-
loc = categories.get_loc(key)
1887-
except KeyError:
1888-
return False
1889-
1890-
# loc is the location of key in categories, but also the value
1891-
# for key in container. So, key may be in categories,
1892-
# but still not in container, this must be checked. Example:
1893-
# 'b' in Categorical(['a'], categories=['a', 'b']) # False
1894-
if is_scalar(loc):
1895-
return loc in container
1896-
else:
1897-
# if categories is an IntervalIndex, loc is an array.
1898-
# Check if any scalar of the array is in the container
1899-
return any(loc_ in container for loc_ in loc)
1900-
19011898
def __contains__(self, key):
19021899
"""Returns True if `key` is in this Categorical."""
19031900
hash(key)
19041901

19051902
if isna(key): # if key is a NaN, check if any NaN is in self.
19061903
return self.isna().any()
19071904

1908-
return self._contains(key, self.categories, container=self._codes)
1905+
return contains(self, key, container=self._codes)
19091906

19101907
def _tidy_repr(self, max_vals=10, footer=True):
19111908
""" a short repr displaying only max_vals and an optional (but default

pandas/core/indexes/category.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import pandas.core.common as com
2525
import pandas.core.missing as missing
2626
import pandas.core.indexes.base as ibase
27-
from pandas.core.arrays import Categorical
27+
from pandas.core.arrays.categorical import Categorical, contains
2828

2929
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3030
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -327,8 +327,7 @@ def __contains__(self, key):
327327
if isna(key): # if key is a NaN, check if any NaN is in self.
328328
return self.hasnans
329329

330-
return Categorical._contains(key, categories=self.categories,
331-
container=self._engine)
330+
return contains(self, key, container=self._engine)
332331

333332
@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
334333
def contains(self, key):

0 commit comments

Comments
 (0)