Skip to content

Commit 9e87dc7

Browse files
authored
BUG: Add limit_area to EA ffill/bfill (#56616)
1 parent a9eb9f2 commit 9e87dc7

File tree

16 files changed

+266
-80
lines changed

16 files changed

+266
-80
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ Other enhancements
321321
- :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
322322
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
323323
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
324-
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
324+
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)
325325
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
326326
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
327327
- Implemented :meth:`Series.dt` methods and attributes for :class:`ArrowDtype` with ``pyarrow.duration`` type (:issue:`52284`)

pandas/core/arrays/_mixins.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,12 @@ def _fill_mask_inplace(
305305
func(self._ndarray.T, limit=limit, mask=mask.T)
306306

307307
def _pad_or_backfill(
308-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
308+
self,
309+
*,
310+
method: FillnaOptions,
311+
limit: int | None = None,
312+
limit_area: Literal["inside", "outside"] | None = None,
313+
copy: bool = True,
309314
) -> Self:
310315
mask = self.isna()
311316
if mask.any():
@@ -315,7 +320,7 @@ def _pad_or_backfill(
315320
npvalues = self._ndarray.T
316321
if copy:
317322
npvalues = npvalues.copy()
318-
func(npvalues, limit=limit, mask=mask.T)
323+
func(npvalues, limit=limit, limit_area=limit_area, mask=mask.T)
319324
npvalues = npvalues.T
320325

321326
if copy:

pandas/core/arrays/arrow/array.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1025,13 +1025,18 @@ def dropna(self) -> Self:
10251025
return type(self)(pc.drop_null(self._pa_array))
10261026

10271027
def _pad_or_backfill(
1028-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
1028+
self,
1029+
*,
1030+
method: FillnaOptions,
1031+
limit: int | None = None,
1032+
limit_area: Literal["inside", "outside"] | None = None,
1033+
copy: bool = True,
10291034
) -> Self:
10301035
if not self._hasna:
10311036
# TODO(CoW): Not necessary anymore when CoW is the default
10321037
return self.copy()
10331038

1034-
if limit is None:
1039+
if limit is None and limit_area is None:
10351040
method = missing.clean_fill_method(method)
10361041
try:
10371042
if method == "pad":
@@ -1047,7 +1052,9 @@ def _pad_or_backfill(
10471052

10481053
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
10491054
# this method entirely.
1050-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
1055+
return super()._pad_or_backfill(
1056+
method=method, limit=limit, limit_area=limit_area, copy=copy
1057+
)
10511058

10521059
@doc(ExtensionArray.fillna)
10531060
def fillna(

pandas/core/arrays/base.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
unique,
7171
)
7272
from pandas.core.array_algos.quantile import quantile_with_mask
73+
from pandas.core.missing import _fill_limit_area_1d
7374
from pandas.core.sorting import (
7475
nargminmax,
7576
nargsort,
@@ -957,7 +958,12 @@ def interpolate(
957958
)
958959

959960
def _pad_or_backfill(
960-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
961+
self,
962+
*,
963+
method: FillnaOptions,
964+
limit: int | None = None,
965+
limit_area: Literal["inside", "outside"] | None = None,
966+
copy: bool = True,
961967
) -> Self:
962968
"""
963969
Pad or backfill values, used by Series/DataFrame ffill and bfill.
@@ -1015,6 +1021,12 @@ def _pad_or_backfill(
10151021
DeprecationWarning,
10161022
stacklevel=find_stack_level(),
10171023
)
1024+
if limit_area is not None:
1025+
raise NotImplementedError(
1026+
f"{type(self).__name__} does not implement limit_area "
1027+
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
1028+
"need to add this argument to _pad_or_backfill."
1029+
)
10181030
return self.fillna(method=method, limit=limit)
10191031

10201032
mask = self.isna()
@@ -1024,6 +1036,8 @@ def _pad_or_backfill(
10241036
meth = missing.clean_fill_method(method)
10251037

10261038
npmask = np.asarray(mask)
1039+
if limit_area is not None and not npmask.all():
1040+
_fill_limit_area_1d(npmask, limit_area)
10271041
if meth == "pad":
10281042
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
10291043
return self.take(indexer, allow_fill=True)

pandas/core/arrays/interval.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -891,11 +891,18 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
891891
return obj[indexer]
892892

893893
def _pad_or_backfill( # pylint: disable=useless-parent-delegation
894-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
894+
self,
895+
*,
896+
method: FillnaOptions,
897+
limit: int | None = None,
898+
limit_area: Literal["inside", "outside"] | None = None,
899+
copy: bool = True,
895900
) -> Self:
896901
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
897902
# this method entirely.
898-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
903+
return super()._pad_or_backfill(
904+
method=method, limit=limit, limit_area=limit_area, copy=copy
905+
)
899906

900907
def fillna(
901908
self, value=None, method=None, limit: int | None = None, copy: bool = True

pandas/core/arrays/masked.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -193,7 +193,12 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
193193
return self._simple_new(self._data[item], newmask)
194194

195195
def _pad_or_backfill(
196-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
196+
self,
197+
*,
198+
method: FillnaOptions,
199+
limit: int | None = None,
200+
limit_area: Literal["inside", "outside"] | None = None,
201+
copy: bool = True,
197202
) -> Self:
198203
mask = self._mask
199204

@@ -205,7 +210,21 @@ def _pad_or_backfill(
205210
if copy:
206211
npvalues = npvalues.copy()
207212
new_mask = new_mask.copy()
213+
elif limit_area is not None:
214+
mask = mask.copy()
208215
func(npvalues, limit=limit, mask=new_mask)
216+
217+
if limit_area is not None and not mask.all():
218+
mask = mask.T
219+
neg_mask = ~mask
220+
first = neg_mask.argmax()
221+
last = len(neg_mask) - neg_mask[::-1].argmax() - 1
222+
if limit_area == "inside":
223+
new_mask[:first] |= mask[:first]
224+
new_mask[last + 1 :] |= mask[last + 1 :]
225+
elif limit_area == "outside":
226+
new_mask[first + 1 : last] |= mask[first + 1 : last]
227+
209228
if copy:
210229
return self._simple_new(npvalues.T, new_mask.T)
211230
else:

pandas/core/arrays/period.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -810,12 +810,19 @@ def searchsorted(
810810
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
811811

812812
def _pad_or_backfill(
813-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
813+
self,
814+
*,
815+
method: FillnaOptions,
816+
limit: int | None = None,
817+
limit_area: Literal["inside", "outside"] | None = None,
818+
copy: bool = True,
814819
) -> Self:
815820
# view as dt64 so we get treated as timelike in core.missing,
816821
# similar to dtl._period_dispatch
817822
dta = self.view("M8[ns]")
818-
result = dta._pad_or_backfill(method=method, limit=limit, copy=copy)
823+
result = dta._pad_or_backfill(
824+
method=method, limit=limit, limit_area=limit_area, copy=copy
825+
)
819826
if copy:
820827
return cast("Self", result.view(self.dtype))
821828
else:

pandas/core/arrays/sparse/array.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -718,11 +718,18 @@ def isna(self) -> Self: # type: ignore[override]
718718
return type(self)(mask, fill_value=False, dtype=dtype)
719719

720720
def _pad_or_backfill( # pylint: disable=useless-parent-delegation
721-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
721+
self,
722+
*,
723+
method: FillnaOptions,
724+
limit: int | None = None,
725+
limit_area: Literal["inside", "outside"] | None = None,
726+
copy: bool = True,
722727
) -> Self:
723728
# TODO(3.0): We can remove this method once deprecation for fillna method
724729
# keyword is enforced.
725-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
730+
return super()._pad_or_backfill(
731+
method=method, limit=limit, limit_area=limit_area, copy=copy
732+
)
726733

727734
def fillna(
728735
self,

pandas/core/internals/blocks.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from functools import wraps
4+
import inspect
45
import re
56
from typing import (
67
TYPE_CHECKING,
@@ -2256,11 +2257,21 @@ def pad_or_backfill(
22562257
) -> list[Block]:
22572258
values = self.values
22582259

2260+
kwargs: dict[str, Any] = {"method": method, "limit": limit}
2261+
if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:
2262+
kwargs["limit_area"] = limit_area
2263+
elif limit_area is not None:
2264+
raise NotImplementedError(
2265+
f"{type(values).__name__} does not implement limit_area "
2266+
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
2267+
"need to add this argument to _pad_or_backfill."
2268+
)
2269+
22592270
if values.ndim == 2 and axis == 1:
22602271
# NDArrayBackedExtensionArray.fillna assumes axis=0
2261-
new_values = values.T._pad_or_backfill(method=method, limit=limit).T
2272+
new_values = values.T._pad_or_backfill(**kwargs).T
22622273
else:
2263-
new_values = values._pad_or_backfill(method=method, limit=limit)
2274+
new_values = values._pad_or_backfill(**kwargs)
22642275
return [self.make_block_same_class(new_values)]
22652276

22662277

0 commit comments

Comments
 (0)