Skip to content

Commit c3ff8d7

Browse files
author
tp
committed
Reimplenent Categorical._contains
1 parent 07dd41c commit c3ff8d7

File tree

2 files changed

+53
-56
lines changed

2 files changed

+53
-56
lines changed

pandas/core/arrays/categorical.py

+51-53
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,56 @@ def _maybe_to_categorical(array):
158158
return array
159159

160160

161+
def contains(cat, key, container):
162+
"""
163+
Helper for membership check for ``key`` in ``cat``.
164+
165+
This is a helper method for :method:`__contains__`
166+
and :class:`CategoricalIndex.__contains__`.
167+
168+
Returns True if ``key`` is in ``cat.categories`` and the
169+
location of ``key`` in ``categories`` is in ``container``.
170+
171+
Parameters
172+
----------
173+
cat : :class:`Categorical`or :class:`categoricalIndex`
174+
key : a hashable object
175+
The key to check membership for.
176+
container : Container (e.g. list-like or mapping)
177+
The container to check for membership in.
178+
179+
Returns
180+
-------
181+
is_in : bool
182+
True if ``key`` is in ``self.categories`` and location of
183+
``key`` in ``categories`` is in ``container``, else False.
184+
185+
Notes
186+
-----
187+
This method does not check for Nan values. Do that separately
188+
before calling this method.
189+
"""
190+
191+
# is key in categories? Then get its location in categories.
192+
# If not (i.e. KeyError), its location logically can't be in
193+
# container either.
194+
try:
195+
loc = cat.categories.get_loc(key)
196+
except KeyError:
197+
return False
198+
199+
# loc is the location of key in categories, but also the value
200+
# for key in container. So, key may be in categories,
201+
# but still not in container, this must be checked. Example:
202+
# 'b' in Categorical(['a'], categories=['a', 'b']) # False
203+
if is_scalar(loc):
204+
return loc in container
205+
else:
206+
# if categories is an IntervalIndex, loc is an array.
207+
# Check if any scalar of the array is in the container
208+
return any(loc_ in container for loc_ in loc)
209+
210+
161211
_codes_doc = """The category codes of this categorical.
162212
163213
Level codes are an array if integer which are the positions of the real
@@ -1847,66 +1897,14 @@ def __iter__(self):
18471897
"""Returns an Iterator over the values of this Categorical."""
18481898
return iter(self.get_values().tolist())
18491899

1850-
@staticmethod
1851-
def _contains(key, categories, container):
1852-
"""
1853-
Helper for membership check for ``key``.
1854-
1855-
This helper method is used in :method:`Categorical.__contains__`
1856-
and in :class:`CategoricalIndex.__contains__`.
1857-
1858-
Returns True if ``key`` is in ``categories`` and the
1859-
location of ``key`` in ``categories`` is in ``container``.
1860-
1861-
Parameters
1862-
----------
1863-
key : a hashable object
1864-
The key to check membership for.
1865-
categories : Sequence
1866-
The possible values for ``key``. The location for ``key``
1867-
in ``categories`` is also its value in ``container``
1868-
container : Container (e.g. list-like or mapping)
1869-
The container to check for membership in.
1870-
1871-
Returns
1872-
-------
1873-
is_in : bool
1874-
True if ``key`` is in ``categories`` and location of
1875-
``key`` in ``categories`` is in ``container``, else False.
1876-
1877-
Notes
1878-
-----
1879-
This method does not check for Nan values. Do that separately
1880-
before calling this method.
1881-
"""
1882-
1883-
# is key in categories? Then get its location in categories.
1884-
# If not (i.e. KeyError), its location logically can't be in
1885-
# container either.
1886-
try:
1887-
loc = categories.get_loc(key)
1888-
except KeyError:
1889-
return False
1890-
1891-
# loc is the location of key in categories, but also the value
1892-
# for key in container. So, key may be in categories,
1893-
# but still not in container, this must be checked. Example:
1894-
# 'b' in Categorical(['a'], categories=['a', 'b']) # False
1895-
if is_scalar(loc):
1896-
return loc in container
1897-
else:
1898-
# if categories is an IntervalIndex, loc is an array.
1899-
# Check if any scalar of the array is in the container
1900-
return any(loc_ in container for loc_ in loc)
1901-
19021900
def __contains__(self, key):
19031901
"""Returns True if `key` is in this Categorical."""
19041902
hash(key)
19051903

19061904
if isna(key): # if key is a NaN, check if any NaN is in self.
19071905
return self.isna().any()
19081906

1909-
return self._contains(key, self.categories, container=self._codes)
1907+
return contains(self, key, container=self._codes)
19101908

19111909
def _tidy_repr(self, max_vals=10, footer=True):
19121910
""" a short repr displaying only max_vals and an optional (but default

pandas/core/indexes/category.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
import pandas.core.common as com
2525
import pandas.core.missing as missing
2626
import pandas.core.indexes.base as ibase
27-
from pandas.core.arrays import Categorical
27+
from pandas.core.arrays.categorical import Categorical, contains
2828

2929
_index_doc_kwargs = dict(ibase._index_doc_kwargs)
3030
_index_doc_kwargs.update(dict(target_klass='CategoricalIndex'))
@@ -327,8 +327,7 @@ def __contains__(self, key):
327327
if isna(key): # if key is a NaN, check if any NaN is in self.
328328
return self.hasnans
329329

330-
return Categorical._contains(key, categories=self.categories,
331-
container=self._engine)
330+
return contains(self, key, container=self._engine)
332331

333332
@Appender(_index_shared_docs['contains'] % _index_doc_kwargs)
334333
def contains(self, key):

0 commit comments

Comments
 (0)