Skip to content

Commit f504526

Browse files
rhshadrachmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#56616: BUG: Add limit_area to EA ffill/bfill
1 parent d43af63 commit f504526

File tree

16 files changed

+266
-80
lines changed

16 files changed

+266
-80
lines changed

doc/source/whatsnew/v2.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,7 @@ Other enhancements
321321
- :meth:`DataFrame.apply` now allows the usage of numba (via ``engine="numba"``) to JIT compile the passed function, allowing for potential speedups (:issue:`54666`)
322322
- :meth:`ExtensionArray._explode` interface method added to allow extension type implementations of the ``explode`` method (:issue:`54833`)
323323
- :meth:`ExtensionArray.duplicated` added to allow extension type implementations of the ``duplicated`` method (:issue:`55255`)
324-
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area`` (:issue:`56492`)
324+
- :meth:`Series.ffill`, :meth:`Series.bfill`, :meth:`DataFrame.ffill`, and :meth:`DataFrame.bfill` have gained the argument ``limit_area``; 3rd party :class:`.ExtensionArray` authors need to add this argument to the method ``_pad_or_backfill`` (:issue:`56492`)
325325
- Allow passing ``read_only``, ``data_only`` and ``keep_links`` arguments to openpyxl using ``engine_kwargs`` of :func:`read_excel` (:issue:`55027`)
326326
- Implement masked algorithms for :meth:`Series.value_counts` (:issue:`54984`)
327327
- Implemented :meth:`Series.dt` methods and attributes for :class:`ArrowDtype` with ``pyarrow.duration`` type (:issue:`52284`)

pandas/core/arrays/_mixins.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,12 @@ def _fill_mask_inplace(
305305
func(self._ndarray.T, limit=limit, mask=mask.T)
306306

307307
def _pad_or_backfill(
308-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
308+
self,
309+
*,
310+
method: FillnaOptions,
311+
limit: int | None = None,
312+
limit_area: Literal["inside", "outside"] | None = None,
313+
copy: bool = True,
309314
) -> Self:
310315
mask = self.isna()
311316
if mask.any():
@@ -315,7 +320,7 @@ def _pad_or_backfill(
315320
npvalues = self._ndarray.T
316321
if copy:
317322
npvalues = npvalues.copy()
318-
func(npvalues, limit=limit, mask=mask.T)
323+
func(npvalues, limit=limit, limit_area=limit_area, mask=mask.T)
319324
npvalues = npvalues.T
320325

321326
if copy:

pandas/core/arrays/arrow/array.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -1005,13 +1005,18 @@ def dropna(self) -> Self:
10051005
return type(self)(pc.drop_null(self._pa_array))
10061006

10071007
def _pad_or_backfill(
1008-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
1008+
self,
1009+
*,
1010+
method: FillnaOptions,
1011+
limit: int | None = None,
1012+
limit_area: Literal["inside", "outside"] | None = None,
1013+
copy: bool = True,
10091014
) -> Self:
10101015
if not self._hasna:
10111016
# TODO(CoW): Not necessary anymore when CoW is the default
10121017
return self.copy()
10131018

1014-
if limit is None:
1019+
if limit is None and limit_area is None:
10151020
method = missing.clean_fill_method(method)
10161021
try:
10171022
if method == "pad":
@@ -1027,7 +1032,9 @@ def _pad_or_backfill(
10271032

10281033
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
10291034
# this method entirely.
1030-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
1035+
return super()._pad_or_backfill(
1036+
method=method, limit=limit, limit_area=limit_area, copy=copy
1037+
)
10311038

10321039
@doc(ExtensionArray.fillna)
10331040
def fillna(

pandas/core/arrays/base.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,7 @@
7070
unique,
7171
)
7272
from pandas.core.array_algos.quantile import quantile_with_mask
73+
from pandas.core.missing import _fill_limit_area_1d
7374
from pandas.core.sorting import (
7475
nargminmax,
7576
nargsort,
@@ -954,7 +955,12 @@ def interpolate(
954955
)
955956

956957
def _pad_or_backfill(
957-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
958+
self,
959+
*,
960+
method: FillnaOptions,
961+
limit: int | None = None,
962+
limit_area: Literal["inside", "outside"] | None = None,
963+
copy: bool = True,
958964
) -> Self:
959965
"""
960966
Pad or backfill values, used by Series/DataFrame ffill and bfill.
@@ -1012,6 +1018,12 @@ def _pad_or_backfill(
10121018
DeprecationWarning,
10131019
stacklevel=find_stack_level(),
10141020
)
1021+
if limit_area is not None:
1022+
raise NotImplementedError(
1023+
f"{type(self).__name__} does not implement limit_area "
1024+
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
1025+
"need to add this argument to _pad_or_backfill."
1026+
)
10151027
return self.fillna(method=method, limit=limit)
10161028

10171029
mask = self.isna()
@@ -1021,6 +1033,8 @@ def _pad_or_backfill(
10211033
meth = missing.clean_fill_method(method)
10221034

10231035
npmask = np.asarray(mask)
1036+
if limit_area is not None and not npmask.all():
1037+
_fill_limit_area_1d(npmask, limit_area)
10241038
if meth == "pad":
10251039
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
10261040
return self.take(indexer, allow_fill=True)

pandas/core/arrays/interval.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -890,11 +890,18 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
890890
return obj[indexer]
891891

892892
def _pad_or_backfill( # pylint: disable=useless-parent-delegation
893-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
893+
self,
894+
*,
895+
method: FillnaOptions,
896+
limit: int | None = None,
897+
limit_area: Literal["inside", "outside"] | None = None,
898+
copy: bool = True,
894899
) -> Self:
895900
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
896901
# this method entirely.
897-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
902+
return super()._pad_or_backfill(
903+
method=method, limit=limit, limit_area=limit_area, copy=copy
904+
)
898905

899906
def fillna(
900907
self, value=None, method=None, limit: int | None = None, copy: bool = True

pandas/core/arrays/masked.py

+20-1
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,12 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
192192
return self._simple_new(self._data[item], newmask)
193193

194194
def _pad_or_backfill(
195-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
195+
self,
196+
*,
197+
method: FillnaOptions,
198+
limit: int | None = None,
199+
limit_area: Literal["inside", "outside"] | None = None,
200+
copy: bool = True,
196201
) -> Self:
197202
mask = self._mask
198203

@@ -204,7 +209,21 @@ def _pad_or_backfill(
204209
if copy:
205210
npvalues = npvalues.copy()
206211
new_mask = new_mask.copy()
212+
elif limit_area is not None:
213+
mask = mask.copy()
207214
func(npvalues, limit=limit, mask=new_mask)
215+
216+
if limit_area is not None and not mask.all():
217+
mask = mask.T
218+
neg_mask = ~mask
219+
first = neg_mask.argmax()
220+
last = len(neg_mask) - neg_mask[::-1].argmax() - 1
221+
if limit_area == "inside":
222+
new_mask[:first] |= mask[:first]
223+
new_mask[last + 1 :] |= mask[last + 1 :]
224+
elif limit_area == "outside":
225+
new_mask[first + 1 : last] |= mask[first + 1 : last]
226+
208227
if copy:
209228
return self._simple_new(npvalues.T, new_mask.T)
210229
else:

pandas/core/arrays/period.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -810,12 +810,19 @@ def searchsorted(
810810
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
811811

812812
def _pad_or_backfill(
813-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
813+
self,
814+
*,
815+
method: FillnaOptions,
816+
limit: int | None = None,
817+
limit_area: Literal["inside", "outside"] | None = None,
818+
copy: bool = True,
814819
) -> Self:
815820
# view as dt64 so we get treated as timelike in core.missing,
816821
# similar to dtl._period_dispatch
817822
dta = self.view("M8[ns]")
818-
result = dta._pad_or_backfill(method=method, limit=limit, copy=copy)
823+
result = dta._pad_or_backfill(
824+
method=method, limit=limit, limit_area=limit_area, copy=copy
825+
)
819826
if copy:
820827
return cast("Self", result.view(self.dtype))
821828
else:

pandas/core/arrays/sparse/array.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -716,11 +716,18 @@ def isna(self) -> Self: # type: ignore[override]
716716
return type(self)(mask, fill_value=False, dtype=dtype)
717717

718718
def _pad_or_backfill( # pylint: disable=useless-parent-delegation
719-
self, *, method: FillnaOptions, limit: int | None = None, copy: bool = True
719+
self,
720+
*,
721+
method: FillnaOptions,
722+
limit: int | None = None,
723+
limit_area: Literal["inside", "outside"] | None = None,
724+
copy: bool = True,
720725
) -> Self:
721726
# TODO(3.0): We can remove this method once deprecation for fillna method
722727
# keyword is enforced.
723-
return super()._pad_or_backfill(method=method, limit=limit, copy=copy)
728+
return super()._pad_or_backfill(
729+
method=method, limit=limit, limit_area=limit_area, copy=copy
730+
)
724731

725732
def fillna(
726733
self,

pandas/core/internals/blocks.py

+13-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
from __future__ import annotations
22

33
from functools import wraps
4+
import inspect
45
import re
56
from typing import (
67
TYPE_CHECKING,
@@ -2256,11 +2257,21 @@ def pad_or_backfill(
22562257
) -> list[Block]:
22572258
values = self.values
22582259

2260+
kwargs: dict[str, Any] = {"method": method, "limit": limit}
2261+
if "limit_area" in inspect.signature(values._pad_or_backfill).parameters:
2262+
kwargs["limit_area"] = limit_area
2263+
elif limit_area is not None:
2264+
raise NotImplementedError(
2265+
f"{type(values).__name__} does not implement limit_area "
2266+
"(added in pandas 2.2). 3rd-party ExtnsionArray authors "
2267+
"need to add this argument to _pad_or_backfill."
2268+
)
2269+
22592270
if values.ndim == 2 and axis == 1:
22602271
# NDArrayBackedExtensionArray.fillna assumes axis=0
2261-
new_values = values.T._pad_or_backfill(method=method, limit=limit).T
2272+
new_values = values.T._pad_or_backfill(**kwargs).T
22622273
else:
2263-
new_values = values._pad_or_backfill(method=method, limit=limit)
2274+
new_values = values._pad_or_backfill(**kwargs)
22642275
return [self.make_block_same_class(new_values)]
22652276

22662277

0 commit comments

Comments
 (0)