Skip to content

Commit 442be89

Browse files
rhshadrachmroeschke
authored andcommitted
BUG: Implement fillna(..., limit=x) for EAs (pandas-dev#58249)
* BUG: Implement fillna(..., limit=x) for EAs * Comments * type ignores * Update doc/source/whatsnew/v3.0.0.rst --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 754403a commit 442be89

File tree

12 files changed

+105
-15
lines changed

12 files changed

+105
-15
lines changed

pandas/core/arrays/_mixins.py

+7
Original file line numberDiff line numberDiff line change
@@ -330,6 +330,13 @@ def _pad_or_backfill(
330330
@doc(ExtensionArray.fillna)
331331
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
332332
mask = self.isna()
333+
if limit is not None and limit < len(self):
334+
# mypy doesn't like that mask can be an EA which need not have `cumsum`
335+
modify = mask.cumsum() > limit # type: ignore[union-attr]
336+
if modify.any():
337+
# Only copy mask if necessary
338+
mask = mask.copy()
339+
mask[modify] = False
333340
# error: Argument 2 to "check_value_size" has incompatible type
334341
# "ExtensionArray"; expected "ndarray"
335342
value = missing.check_value_size(

pandas/core/arrays/base.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -755,7 +755,8 @@ def isna(self) -> np.ndarray | ExtensionArraySupportsAnyAll:
755755
If returning an ExtensionArray, then
756756
757757
* ``na_values._is_boolean`` should be True
758-
* `na_values` should implement :func:`ExtensionArray._reduce`
758+
* ``na_values`` should implement :func:`ExtensionArray._reduce`
759+
* ``na_values`` should implement :func:`ExtensionArray._accumulate`
759760
* ``na_values.any`` and ``na_values.all`` should be implemented
760761
761762
Examples
@@ -1058,19 +1059,12 @@ def fillna(
10581059
Alternatively, an array-like "value" can be given. It's expected
10591060
that the array-like have the same length as 'self'.
10601061
limit : int, default None
1061-
If method is specified, this is the maximum number of consecutive
1062-
NaN values to forward/backward fill. In other words, if there is
1063-
a gap with more than this number of consecutive NaNs, it will only
1064-
be partially filled. If method is not specified, this is the
1065-
maximum number of entries along the entire axis where NaNs will be
1066-
filled.
1062+
The maximum number of entries where NA values will be filled.
10671063
copy : bool, default True
10681064
Whether to make a copy of the data before filling. If False, then
10691065
the original should be modified and no new memory should be allocated.
10701066
For ExtensionArray subclasses that cannot do this, it is at the
10711067
author's discretion whether to ignore "copy=False" or to raise.
1072-
The base class implementation ignores the keyword in pad/backfill
1073-
cases.
10741068
10751069
Returns
10761070
-------
@@ -1086,6 +1080,15 @@ def fillna(
10861080
Length: 6, dtype: Int64
10871081
"""
10881082
mask = self.isna()
1083+
if limit is not None and limit < len(self):
1084+
# isna can return an ExtensionArray, we're assuming that comparisons
1085+
# are implemented.
1086+
# mypy doesn't like that mask can be an EA which need not have `cumsum`
1087+
modify = mask.cumsum() > limit # type: ignore[union-attr]
1088+
if modify.any():
1089+
# Only copy mask if necessary
1090+
mask = mask.copy()
1091+
mask[modify] = False
10891092
# error: Argument 2 to "check_value_size" has incompatible type
10901093
# "ExtensionArray"; expected "ndarray"
10911094
value = missing.check_value_size(

pandas/core/arrays/interval.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -905,12 +905,7 @@ def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
905905
value(s) passed should be either Interval objects or NA/NaN.
906906
limit : int, default None
907907
(Not implemented yet for IntervalArray)
908-
If method is specified, this is the maximum number of consecutive
909-
NaN values to forward/backward fill. In other words, if there is
910-
a gap with more than this number of consecutive NaNs, it will only
911-
be partially filled. If method is not specified, this is the
912-
maximum number of entries along the entire axis where NaNs will be
913-
filled.
908+
The maximum number of entries where NA values will be filled.
914909
copy : bool, default True
915910
Whether to make a copy of the data before filling. If False, then
916911
the original should be modified and no new memory should be allocated.
@@ -923,6 +918,8 @@ def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
923918
"""
924919
if copy is False:
925920
raise NotImplementedError
921+
if limit is not None:
922+
raise ValueError("limit must be None")
926923

927924
value_left, value_right = self._validate_scalar(value)
928925

pandas/core/arrays/masked.py

+6
Original file line numberDiff line numberDiff line change
@@ -232,6 +232,12 @@ def _pad_or_backfill(
232232
@doc(ExtensionArray.fillna)
233233
def fillna(self, value, limit: int | None = None, copy: bool = True) -> Self:
234234
mask = self._mask
235+
if limit is not None and limit < len(self):
236+
modify = mask.cumsum() > limit
237+
if modify.any():
238+
# Only copy mask if necessary
239+
mask = mask.copy()
240+
mask[modify] = False
235241

236242
value = missing.check_value_size(value, mask, len(self))
237243

pandas/core/arrays/sparse/array.py

+3
Original file line numberDiff line numberDiff line change
@@ -717,6 +717,7 @@ def fillna(
717717
----------
718718
value : scalar
719719
limit : int, optional
720+
Not supported for SparseArray, must be None.
720721
copy: bool, default True
721722
Ignored for SparseArray.
722723
@@ -736,6 +737,8 @@ def fillna(
736737
When ``self.fill_value`` is not NA, the result dtype will be
737738
``self.dtype``. Again, this preserves the amount of memory used.
738739
"""
740+
if limit is not None:
741+
raise ValueError("limit must be None")
739742
new_values = np.where(isna(self.sp_values), value, self.sp_values)
740743

741744
if self._null_fill_value:

pandas/core/internals/blocks.py

+2
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,8 @@ def fillna(
18631863
) -> list[Block]:
18641864
if isinstance(self.dtype, IntervalDtype):
18651865
# Block.fillna handles coercion (test_fillna_interval)
1866+
if limit is not None:
1867+
raise ValueError("limit must be None")
18661868
return super().fillna(
18671869
value=value,
18681870
limit=limit,

pandas/tests/extension/base/methods.py

+14
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,20 @@ def test_factorize_empty(self, data):
299299
tm.assert_numpy_array_equal(codes, expected_codes)
300300
tm.assert_extension_array_equal(uniques, expected_uniques)
301301

302+
def test_fillna_limit_frame(self, data_missing):
303+
# GH#58001
304+
df = pd.DataFrame({"A": data_missing.take([0, 1, 0, 1])})
305+
expected = pd.DataFrame({"A": data_missing.take([1, 1, 0, 1])})
306+
result = df.fillna(value=data_missing[1], limit=1)
307+
tm.assert_frame_equal(result, expected)
308+
309+
def test_fillna_limit_series(self, data_missing):
310+
# GH#58001
311+
ser = pd.Series(data_missing.take([0, 1, 0, 1]))
312+
expected = pd.Series(data_missing.take([1, 1, 0, 1]))
313+
result = ser.fillna(value=data_missing[1], limit=1)
314+
tm.assert_series_equal(result, expected)
315+
302316
def test_fillna_copy_frame(self, data_missing):
303317
arr = data_missing.take([1, 1])
304318
df = pd.DataFrame({"A": arr})

pandas/tests/extension/decimal/test_decimal.py

+16
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,22 @@ def test_fillna_with_none(self, data_missing):
152152
with pytest.raises(TypeError, match=msg):
153153
super().test_fillna_with_none(data_missing)
154154

155+
def test_fillna_limit_frame(self, data_missing):
156+
# GH#58001
157+
msg = "ExtensionArray.fillna added a 'copy' keyword"
158+
with tm.assert_produces_warning(
159+
DeprecationWarning, match=msg, check_stacklevel=False
160+
):
161+
super().test_fillna_limit_frame(data_missing)
162+
163+
def test_fillna_limit_series(self, data_missing):
164+
# GH#58001
165+
msg = "ExtensionArray.fillna added a 'copy' keyword"
166+
with tm.assert_produces_warning(
167+
DeprecationWarning, match=msg, check_stacklevel=False
168+
):
169+
super().test_fillna_limit_series(data_missing)
170+
155171
@pytest.mark.parametrize("dropna", [True, False])
156172
def test_value_counts(self, all_data, dropna):
157173
all_data = all_data[:10]

pandas/tests/extension/json/test_json.py

+10
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,16 @@ def test_fillna_with_none(self, data_missing):
156156
with pytest.raises(AssertionError):
157157
super().test_fillna_with_none(data_missing)
158158

159+
@pytest.mark.xfail(reason="fill value is a dictionary, takes incorrect code path")
160+
def test_fillna_limit_frame(self, data_missing):
161+
# GH#58001
162+
super().test_fillna_limit_frame(data_missing)
163+
164+
@pytest.mark.xfail(reason="fill value is a dictionary, takes incorrect code path")
165+
def test_fillna_limit_series(self, data_missing):
166+
# GH#58001
167+
super().test_fillna_limit_frame(data_missing)
168+
159169
@pytest.mark.parametrize(
160170
"limit_area, input_ilocs, expected_ilocs",
161171
[

pandas/tests/extension/test_interval.py

+10
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,16 @@ class TestIntervalArray(base.ExtensionTests):
8484
def _supports_reduction(self, ser: pd.Series, op_name: str) -> bool:
8585
return op_name in ["min", "max"]
8686

87+
def test_fillna_limit_frame(self, data_missing):
88+
# GH#58001
89+
with pytest.raises(ValueError, match="limit must be None"):
90+
super().test_fillna_limit_frame(data_missing)
91+
92+
def test_fillna_limit_series(self, data_missing):
93+
# GH#58001
94+
with pytest.raises(ValueError, match="limit must be None"):
95+
super().test_fillna_limit_frame(data_missing)
96+
8797
@pytest.mark.xfail(
8898
reason="Raises with incorrect message bc it disallows *all* listlikes "
8999
"instead of just wrong-length listlikes"

pandas/tests/extension/test_numpy.py

+12
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,18 @@ def test_shift_fill_value(self, data):
205205
# np.array shape inference. Shift implementation fails.
206206
super().test_shift_fill_value(data)
207207

208+
@skip_nested
209+
def test_fillna_limit_frame(self, data_missing):
210+
# GH#58001
211+
# The "scalar" for this array isn't a scalar.
212+
super().test_fillna_limit_frame(data_missing)
213+
214+
@skip_nested
215+
def test_fillna_limit_series(self, data_missing):
216+
# GH#58001
217+
# The "scalar" for this array isn't a scalar.
218+
super().test_fillna_limit_series(data_missing)
219+
208220
@skip_nested
209221
def test_fillna_copy_frame(self, data_missing):
210222
# The "scalar" for this array isn't a scalar.

pandas/tests/extension/test_sparse.py

+10
Original file line numberDiff line numberDiff line change
@@ -264,6 +264,16 @@ def test_fillna_frame(self, data_missing):
264264

265265
tm.assert_frame_equal(result, expected)
266266

267+
def test_fillna_limit_frame(self, data_missing):
268+
# GH#58001
269+
with pytest.raises(ValueError, match="limit must be None"):
270+
super().test_fillna_limit_frame(data_missing)
271+
272+
def test_fillna_limit_series(self, data_missing):
273+
# GH#58001
274+
with pytest.raises(ValueError, match="limit must be None"):
275+
super().test_fillna_limit_frame(data_missing)
276+
267277
_combine_le_expected_dtype = "Sparse[bool]"
268278

269279
def test_fillna_copy_frame(self, data_missing):

0 commit comments

Comments
 (0)