Skip to content

Commit ca5198a

Browse files
MarcoGorelliTomAugspurger
authored andcommitted
BUG: Ensure that fill_na in Categorical only replaces null values (#27932)
* Ensure that :func: in :class: only replaces null values
1 parent a818281 commit ca5198a

File tree

3 files changed

+25
-3
lines changed

3 files changed

+25
-3
lines changed

doc/source/whatsnew/v0.25.1.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ Bug fixes
2525
Categorical
2626
^^^^^^^^^^^
2727

28-
-
28+
- Bug in :meth:`Categorical.fillna` would replace all values, not just those that are ``NaN`` (:issue:`26215`)
2929
-
3030
-
3131

pandas/core/arrays/categorical.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1840,8 +1840,8 @@ def fillna(self, value=None, method=None, limit=None):
18401840
raise ValueError("fill value must be in categories")
18411841

18421842
values_codes = _get_codes_for_values(value, self.categories)
1843-
indexer = np.where(values_codes != -1)
1844-
codes[indexer] = values_codes[values_codes != -1]
1843+
indexer = np.where(codes == -1)
1844+
codes[indexer] = values_codes[indexer]
18451845

18461846
# If value is not a dict or Series it should be a scalar
18471847
elif is_hashable(value):

pandas/tests/series/test_missing.py

+22
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,28 @@ def test_fillna_categorical(self, fill_value, expected_output):
578578
exp = Series(Categorical(expected_output, categories=["a", "b"]))
579579
tm.assert_series_equal(s.fillna(fill_value), exp)
580580

581+
@pytest.mark.parametrize(
582+
"fill_value, expected_output",
583+
[
584+
(Series(["a", "b", "c", "d", "e"]), ["a", "b", "b", "d", "e"]),
585+
(Series(["b", "d", "a", "d", "a"]), ["a", "d", "b", "d", "a"]),
586+
(
587+
Series(
588+
Categorical(
589+
["b", "d", "a", "d", "a"], categories=["b", "c", "d", "e", "a"]
590+
)
591+
),
592+
["a", "d", "b", "d", "a"],
593+
),
594+
],
595+
)
596+
def test_fillna_categorical_with_new_categories(self, fill_value, expected_output):
597+
# GH 26215
598+
data = ["a", np.nan, "b", np.nan, np.nan]
599+
s = Series(Categorical(data, categories=["a", "b", "c", "d", "e"]))
600+
exp = Series(Categorical(expected_output, categories=["a", "b", "c", "d", "e"]))
601+
tm.assert_series_equal(s.fillna(fill_value), exp)
602+
581603
def test_fillna_categorical_raise(self):
582604
data = ["a", np.nan, "b", np.nan, np.nan]
583605
s = Series(Categorical(data, categories=["a", "b"]))

0 commit comments

Comments
 (0)