diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 89a9da4a73b35..6604199930bc5 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -414,7 +414,7 @@ Categorical ^^^^^^^^^^^ - Bug in :func:`DataFrame.at` and :func:`Series.at` that would raise exception if the index was a :class:`CategoricalIndex` (:issue:`20629`) -- +- Fixed bug in comparison of ordered :class:`Categorical` that contained missing values with a scalar which sometimes incorrectly resulted in True (:issue:`26504`) - Datetimelike diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 89b86c66d7b05..44bb44457bc25 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -89,18 +89,23 @@ def f(self, other): else: other_codes = other._codes - na_mask = (self._codes == -1) | (other_codes == -1) + mask = (self._codes == -1) | (other_codes == -1) f = getattr(self._codes, op) ret = f(other_codes) - if na_mask.any(): + if mask.any(): # In other series, the leads to False, so do that here too - ret[na_mask] = False + ret[mask] = False return ret if is_scalar(other): if other in self.categories: i = self.categories.get_loc(other) - return getattr(self._codes, op)(i) + ret = getattr(self._codes, op)(i) + + # check for NaN in self + mask = (self._codes == -1) + ret[mask] = False + return ret else: if op == '__eq__': return np.repeat(False, len(self)) diff --git a/pandas/tests/arrays/categorical/test_operators.py b/pandas/tests/arrays/categorical/test_operators.py index dc6e1a5bc36b3..a443408bf9479 100644 --- a/pandas/tests/arrays/categorical/test_operators.py +++ b/pandas/tests/arrays/categorical/test_operators.py @@ -1,4 +1,5 @@ import operator +import warnings import numpy as np import pytest @@ -17,7 +18,6 @@ def test_categories_none_comparisons(self): tm.assert_categorical_equal(factor, self.factor) def test_comparisons(self): - result = self.factor[self.factor == 'a'] expected = self.factor[np.asarray(self.factor) == 'a'] tm.assert_categorical_equal(result, expected) @@ -186,6 +186,36 @@ def test_comparison_with_unknown_scalars(self): tm.assert_numpy_array_equal(cat != 4, np.array([True, True, True])) + def test_comparison_of_ordered_categorical_with_nan_to_scalar( + self, compare_operators_no_eq_ne): + # https://github.com/pandas-dev/pandas/issues/26504 + # BUG: fix ordered categorical comparison with missing values (#26504 ) + # and following comparisons with scalars in categories with missing + # values should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + scalar = 2 + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), + compare_operators_no_eq_ne)(scalar) + actual = getattr(cat, compare_operators_no_eq_ne)(scalar) + tm.assert_numpy_array_equal(actual, expected) + + def test_comparison_of_ordered_categorical_with_nan_to_listlike( + self, compare_operators_no_eq_ne): + # https://github.com/pandas-dev/pandas/issues/26504 + # and following comparisons of missing values in ordered Categorical + # with listlike should be evaluated as False + + cat = Categorical([1, 2, 3, None], categories=[1, 2, 3], ordered=True) + other = Categorical([2, 2, 2, 2], categories=[1, 2, 3], ordered=True) + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + expected = getattr(np.array(cat), compare_operators_no_eq_ne)(2) + actual = getattr(cat, compare_operators_no_eq_ne)(other) + tm.assert_numpy_array_equal(actual, expected) + @pytest.mark.parametrize('data,reverse,base', [ (list("abc"), list("cba"), list("bbb")), ([1, 2, 3], [3, 2, 1], [2, 2, 2])]