Skip to content

Commit f113e46

Browse files
authored
BUG: CategoricalIndex.equals casting non-categories to np.nan (#37667)
1 parent 17489ad commit f113e46

File tree

3 files changed

+19
-7
lines changed

3 files changed

+19
-7
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,7 @@ Categorical
380380
^^^^^^^^^^^
381381
- :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`)
382382
- Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`)
383-
-
383+
- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`)
384384

385385
Datetimelike
386386
^^^^^^^^^^^^

pandas/core/indexes/category.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
pandas_dtype,
2121
)
2222
from pandas.core.dtypes.dtypes import CategoricalDtype
23-
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna
23+
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
2424

2525
from pandas.core import accessor
2626
from pandas.core.arrays.categorical import Categorical, contains
@@ -263,6 +263,7 @@ def _is_dtype_compat(self, other) -> Categorical:
263263
values = other
264264
if not is_list_like(values):
265265
values = [values]
266+
266267
cat = Categorical(other, dtype=self.dtype)
267268
other = CategoricalIndex(cat)
268269
if not other.isin(values).all():
@@ -271,6 +272,12 @@ def _is_dtype_compat(self, other) -> Categorical:
271272
)
272273
other = other._values
273274

275+
if not ((other == values) | (isna(other) & isna(values))).all():
276+
# GH#37667 see test_equals_non_category
277+
raise TypeError(
278+
"categories must match existing categories when appending"
279+
)
280+
274281
return other
275282

276283
def equals(self, other: object) -> bool:
@@ -291,13 +298,10 @@ def equals(self, other: object) -> bool:
291298

292299
try:
293300
other = self._is_dtype_compat(other)
294-
if isinstance(other, type(self)):
295-
other = other._data
296-
return self._data.equals(other)
297301
except (TypeError, ValueError):
298-
pass
302+
return False
299303

300-
return False
304+
return self._data.equals(other)
301305

302306
# --------------------------------------------------------------------
303307
# Rendering Methods

pandas/tests/indexes/categorical/test_category.py

+8
Original file line numberDiff line numberDiff line change
@@ -444,6 +444,14 @@ def test_equals_categorical_unordered(self):
444444
assert not a.equals(c)
445445
assert not b.equals(c)
446446

447+
def test_equals_non_category(self):
448+
# GH#37667 Case where other contains a value not among ci's
449+
# categories ("D") and also contains np.nan
450+
ci = CategoricalIndex(["A", "B", np.nan, np.nan])
451+
other = Index(["A", "B", "D", np.nan])
452+
453+
assert not ci.equals(other)
454+
447455
def test_frame_repr(self):
448456
df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"]))
449457
result = repr(df)

0 commit comments

Comments
 (0)