diff --git a/doc/source/whatsnew/v0.25.1.rst b/doc/source/whatsnew/v0.25.1.rst index 102ecb9cb0f4e..245aa42b0de14 100644 --- a/doc/source/whatsnew/v0.25.1.rst +++ b/doc/source/whatsnew/v0.25.1.rst @@ -25,7 +25,7 @@ Bug fixes Categorical ^^^^^^^^^^^ -- +- Bug in :meth:`Categorical.fillna` would replace all values, not just those that are ``NaN`` (:issue:`26215`) - - diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index df5cd12a479f0..86a22d03440dd 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1864,8 +1864,8 @@ def fillna(self, value=None, method=None, limit=None): raise ValueError("fill value must be in categories") values_codes = _get_codes_for_values(value, self.categories) - indexer = np.where(values_codes != -1) - codes[indexer] = values_codes[values_codes != -1] + indexer = np.where(codes == -1) + codes[indexer] = values_codes[indexer] # If value is not a dict or Series it should be a scalar elif is_hashable(value): diff --git a/pandas/tests/series/test_missing.py b/pandas/tests/series/test_missing.py index 10375719be8d2..f847a39541558 100644 --- a/pandas/tests/series/test_missing.py +++ b/pandas/tests/series/test_missing.py @@ -578,6 +578,28 @@ def test_fillna_categorical(self, fill_value, expected_output): exp = Series(Categorical(expected_output, categories=["a", "b"])) tm.assert_series_equal(s.fillna(fill_value), exp) + @pytest.mark.parametrize( + "fill_value, expected_output", + [ + (Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]), + (Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]), + ( + Series( + Categorical( + ["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"] + ) + ), + ["a", "d", "b", "d", "a"], + ), + ], + ) + def test_fillna_categorical_with_new_categories(self, fill_value, expected_output): + # GH 26215 + data = ["a", np.nan, "b", np.nan, np.nan] + s = Series(Categorical(data, categories=["a", "b", "c", "d", "e"])) + exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"])) + tm.assert_series_equal(s.fillna(fill_value), exp) + def test_fillna_categorical_raise(self): data = ["a", np.nan, "b", np.nan, np.nan] s = Series(Categorical(data, categories=["a", "b"]))