diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 690e6b8f725ad..73493bbeb0eac 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -380,7 +380,7 @@ Categorical ^^^^^^^^^^^ - :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`) - Bug in :meth:`Categorical.__setitem__` that incorrectly raised when trying to set a tuple value (:issue:`20439`) -- +- Bug in :meth:`CategoricalIndex.equals` incorrectly casting non-category entries to ``np.nan`` (:issue:`37667`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 525c41bae8b51..085d2f5e832a2 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -20,7 +20,7 @@ pandas_dtype, ) from pandas.core.dtypes.dtypes import CategoricalDtype -from pandas.core.dtypes.missing import is_valid_nat_for_dtype, notna +from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna from pandas.core import accessor from pandas.core.arrays.categorical import Categorical, contains @@ -263,6 +263,7 @@ def _is_dtype_compat(self, other) -> Categorical: values = other if not is_list_like(values): values = [values] + cat = Categorical(other, dtype=self.dtype) other = CategoricalIndex(cat) if not other.isin(values).all(): @@ -271,6 +272,12 @@ def _is_dtype_compat(self, other) -> Categorical: ) other = other._values + if not ((other == values) | (isna(other) & isna(values))).all(): + # GH#37667 see test_equals_non_category + raise TypeError( + "categories must match existing categories when appending" + ) + return other def equals(self, other: object) -> bool: @@ -291,13 +298,10 @@ def equals(self, other: object) -> bool: try: other = self._is_dtype_compat(other) - if isinstance(other, type(self)): - other = other._data - return self._data.equals(other) except (TypeError, ValueError): - pass + return False - return False + return self._data.equals(other) # -------------------------------------------------------------------- # Rendering Methods diff --git a/pandas/tests/indexes/categorical/test_category.py b/pandas/tests/indexes/categorical/test_category.py index cf2430d041d88..324a2535bc465 100644 --- a/pandas/tests/indexes/categorical/test_category.py +++ b/pandas/tests/indexes/categorical/test_category.py @@ -444,6 +444,14 @@ def test_equals_categorical_unordered(self): assert not a.equals(c) assert not b.equals(c) + def test_equals_non_category(self): + # GH#37667 Case where other contains a value not among ci's + # categories ("D") and also contains np.nan + ci = CategoricalIndex(["A", "B", np.nan, np.nan]) + other = Index(["A", "B", "D", np.nan]) + + assert not ci.equals(other) + def test_frame_repr(self): df = pd.DataFrame({"A": [1, 2, 3]}, index=CategoricalIndex(["a", "b", "c"])) result = repr(df)