Skip to content

Commit 42480cf

Browse files
jbrockmendelKevin D Smith
authored and
Kevin D Smith
committed
REF: Categorical.fillna match patterns in other methods (pandas-dev#36530)
1 parent 7484eb3 commit 42480cf

File tree

5 files changed

+44
-33
lines changed

5 files changed

+44
-33
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ Bug fixes
238238

239239
Categorical
240240
^^^^^^^^^^^
241-
241+
- :meth:`Categorical.fillna` will always return a copy, will validate a passed fill value regardless of whether there are any NAs to fill, and will disallow a ``NaT`` as a fill value for numeric categories (:issue:`36530`)
242242
-
243243
-
244244

pandas/core/arrays/categorical.py

+12-29
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@
3737
)
3838
from pandas.core.dtypes.dtypes import CategoricalDtype
3939
from pandas.core.dtypes.generic import ABCIndexClass, ABCSeries
40-
from pandas.core.dtypes.inference import is_hashable
4140
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
4241

4342
from pandas.core import ops
@@ -1630,6 +1629,7 @@ def fillna(self, value=None, method=None, limit=None):
16301629
value, method = validate_fillna_kwargs(
16311630
value, method, validate_scalar_dict_value=False
16321631
)
1632+
value = extract_array(value, extract_numpy=True)
16331633

16341634
if value is None:
16351635
value = np.nan
@@ -1638,10 +1638,8 @@ def fillna(self, value=None, method=None, limit=None):
16381638
"specifying a limit for fillna has not been implemented yet"
16391639
)
16401640

1641-
codes = self._codes
1642-
1643-
# pad / bfill
16441641
if method is not None:
1642+
# pad / bfill
16451643

16461644
# TODO: dispatch when self.categories is EA-dtype
16471645
values = np.asarray(self).reshape(-1, len(self))
@@ -1651,40 +1649,25 @@ def fillna(self, value=None, method=None, limit=None):
16511649
codes = _get_codes_for_values(values, self.categories)
16521650

16531651
else:
1652+
# We copy even if there is nothing to fill
1653+
codes = self._ndarray.copy()
1654+
mask = self.isna()
16541655

1655-
# If value is a dict or a Series (a dict value has already
1656-
# been converted to a Series)
1657-
if isinstance(value, (np.ndarray, Categorical, ABCSeries)):
1656+
if isinstance(value, (np.ndarray, Categorical)):
16581657
# We get ndarray or Categorical if called via Series.fillna,
16591658
# where it will unwrap another aligned Series before getting here
16601659

1661-
mask = ~algorithms.isin(value, self.categories)
1662-
if not isna(value[mask]).all():
1660+
not_categories = ~algorithms.isin(value, self.categories)
1661+
if not isna(value[not_categories]).all():
1662+
# All entries in `value` must either be a category or NA
16631663
raise ValueError("fill value must be in categories")
16641664

16651665
values_codes = _get_codes_for_values(value, self.categories)
1666-
indexer = np.where(codes == -1)
1667-
codes = codes.copy()
1668-
codes[indexer] = values_codes[indexer]
1669-
1670-
# If value is not a dict or Series it should be a scalar
1671-
elif is_hashable(value):
1672-
if not isna(value) and value not in self.categories:
1673-
raise ValueError("fill value must be in categories")
1674-
1675-
mask = codes == -1
1676-
if mask.any():
1677-
codes = codes.copy()
1678-
if isna(value):
1679-
codes[mask] = -1
1680-
else:
1681-
codes[mask] = self._unbox_scalar(value)
1666+
codes[mask] = values_codes[mask]
16821667

16831668
else:
1684-
raise TypeError(
1685-
f"'value' parameter must be a scalar, dict "
1686-
f"or Series, but you passed a {type(value).__name__}"
1687-
)
1669+
new_code = self._validate_fill_value(value)
1670+
codes[mask] = new_code
16881671

16891672
return self._from_backing_data(codes)
16901673

pandas/tests/frame/test_missing.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -362,7 +362,8 @@ def test_na_actions_categorical(self):
362362
res = df.fillna(value={"cats": 3, "vals": "b"})
363363
tm.assert_frame_equal(res, df_exp_fill)
364364

365-
with pytest.raises(ValueError, match=("fill value must be in categories")):
365+
msg = "'fill_value=4' is not present in this Categorical's categories"
366+
with pytest.raises(ValueError, match=msg):
366367
df.fillna(value={"cats": 4, "vals": "c"})
367368

368369
res = df.fillna(method="pad")

pandas/tests/indexes/categorical/test_fillna.py

+27-1
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,32 @@ def test_fillna_categorical(self):
1414
tm.assert_index_equal(idx.fillna(1.0), exp)
1515

1616
# fill by value not in categories raises ValueError
17-
msg = "fill value must be in categories"
17+
msg = "'fill_value=2.0' is not present in this Categorical's categories"
1818
with pytest.raises(ValueError, match=msg):
1919
idx.fillna(2.0)
20+
21+
def test_fillna_copies_with_no_nas(self):
22+
# Nothing to fill, should still get a copy
23+
ci = CategoricalIndex([0, 1, 1])
24+
cat = ci._data
25+
result = ci.fillna(0)
26+
assert result._values._ndarray is not cat._ndarray
27+
assert result._values._ndarray.base is None
28+
29+
# Same check directly on the Categorical object
30+
result = cat.fillna(0)
31+
assert result._ndarray is not cat._ndarray
32+
assert result._ndarray.base is None
33+
34+
def test_fillna_validates_with_no_nas(self):
35+
# We validate the fill value even if fillna is a no-op
36+
ci = CategoricalIndex([2, 3, 3])
37+
cat = ci._data
38+
39+
msg = "'fill_value=False' is not present in this Categorical's categories"
40+
with pytest.raises(ValueError, match=msg):
41+
ci.fillna(False)
42+
43+
# Same check directly on the Categorical
44+
with pytest.raises(ValueError, match=msg):
45+
cat.fillna(False)

pandas/tests/series/methods/test_fillna.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,8 @@ def test_fillna_categorical_raises(self):
125125
data = ["a", np.nan, "b", np.nan, np.nan]
126126
ser = Series(Categorical(data, categories=["a", "b"]))
127127

128-
with pytest.raises(ValueError, match="fill value must be in categories"):
128+
msg = "'fill_value=d' is not present in this Categorical's categories"
129+
with pytest.raises(ValueError, match=msg):
129130
ser.fillna("d")
130131

131132
with pytest.raises(ValueError, match="fill value must be in categories"):

0 commit comments

Comments
 (0)