Skip to content

Commit 0a9efad

Browse files
authored
REF: implement EA.pad_or_backfill (#54001)
* REF: implement EA.pad_or_backfill * Update doc * returns section in docstring * docstring fixup * troubleshoot docbuild * update doc * Avoid warning * pylint ignores
1 parent 8b78385 commit 0a9efad

File tree

17 files changed

+266
-43
lines changed

17 files changed

+266
-43
lines changed

doc/source/reference/extensions.rst

+1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ objects.
5454
api.extensions.ExtensionArray.interpolate
5555
api.extensions.ExtensionArray.isin
5656
api.extensions.ExtensionArray.isna
57+
api.extensions.ExtensionArray.pad_or_backfill
5758
api.extensions.ExtensionArray.ravel
5859
api.extensions.ExtensionArray.repeat
5960
api.extensions.ExtensionArray.searchsorted

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -417,6 +417,7 @@ Other Deprecations
417417
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
418418
- Deprecated replacing builtin and NumPy functions in ``.agg``, ``.apply``, and ``.transform``; use the corresponding string alias (e.g. ``"sum"`` for ``sum`` or ``np.sum``) instead (:issue:`53425`)
419419
- Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`)
420+
- Deprecated the "method" and "limit" keywords in ``ExtensionArray.fillna``, implement and use ``pad_or_backfill`` instead (:issue:`53621`)
420421
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
421422
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
422423
- Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`)

pandas/core/arrays/_mixins.py

+32-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
AxisInt,
1919
Dtype,
2020
F,
21+
FillnaOptions,
2122
PositionalIndexer2D,
2223
PositionalIndexerTuple,
2324
ScalarIndexer,
@@ -295,6 +296,37 @@ def _fill_mask_inplace(
295296
func = missing.get_fill_func(method, ndim=self.ndim)
296297
func(self._ndarray.T, limit=limit, mask=mask.T)
297298

299+
def pad_or_backfill(
300+
self,
301+
*,
302+
method: FillnaOptions,
303+
limit: int | None = None,
304+
limit_area: Literal["inside", "outside"] | None = None,
305+
copy: bool = True,
306+
) -> Self:
307+
mask = self.isna()
308+
if mask.any():
309+
# (for now) when self.ndim == 2, we assume axis=0
310+
func = missing.get_fill_func(method, ndim=self.ndim)
311+
312+
npvalues = self._ndarray.T
313+
if copy:
314+
npvalues = npvalues.copy()
315+
func(npvalues, limit=limit, mask=mask.T)
316+
npvalues = npvalues.T
317+
318+
if copy:
319+
new_values = self._from_backing_data(npvalues)
320+
else:
321+
new_values = self
322+
323+
else:
324+
if copy:
325+
new_values = self.copy()
326+
else:
327+
new_values = self
328+
return new_values
329+
298330
@doc(ExtensionArray.fillna)
299331
def fillna(
300332
self, value=None, method=None, limit: int | None = None, copy: bool = True
@@ -312,7 +344,6 @@ def fillna(
312344

313345
if mask.any():
314346
if method is not None:
315-
# TODO: check value is None
316347
# (for now) when self.ndim == 2, we assume axis=0
317348
func = missing.get_fill_func(method, ndim=self.ndim)
318349
npvalues = self._ndarray.T

pandas/core/arrays/arrow/array.py

+18
Original file line numberDiff line numberDiff line change
@@ -899,6 +899,24 @@ def dropna(self) -> Self:
899899
"""
900900
return type(self)(pc.drop_null(self._pa_array))
901901

902+
def pad_or_backfill(
903+
self,
904+
*,
905+
method: FillnaOptions,
906+
limit: int | None = None,
907+
limit_area: Literal["inside", "outside"] | None = None,
908+
copy: bool = True,
909+
) -> Self:
910+
if not self._hasna:
911+
# TODO(CoW): Not necessary anymore when CoW is the default
912+
return self.copy()
913+
914+
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
915+
# this method entirely.
916+
return super().pad_or_backfill(
917+
method=method, limit=limit, limit_area=limit_area, copy=copy
918+
)
919+
902920
@doc(ExtensionArray.fillna)
903921
def fillna(
904922
self,

pandas/core/arrays/base.py

+105-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
cast,
1919
overload,
2020
)
21+
import warnings
2122

2223
import numpy as np
2324

@@ -33,6 +34,7 @@
3334
Substitution,
3435
cache_readonly,
3536
)
37+
from pandas.util._exceptions import find_stack_level
3638
from pandas.util._validators import (
3739
validate_bool_kwarg,
3840
validate_fillna_kwargs,
@@ -130,6 +132,7 @@ class ExtensionArray:
130132
interpolate
131133
isin
132134
isna
135+
pad_or_backfill
133136
ravel
134137
repeat
135138
searchsorted
@@ -180,6 +183,7 @@ class ExtensionArray:
180183
methods:
181184
182185
* fillna
186+
* pad_or_backfill
183187
* dropna
184188
* unique
185189
* factorize / _values_for_factorize
@@ -907,6 +911,93 @@ def interpolate(
907911
f"{type(self).__name__} does not implement interpolate"
908912
)
909913

914+
def pad_or_backfill(
915+
self,
916+
*,
917+
method: FillnaOptions,
918+
limit: int | None = None,
919+
limit_area: Literal["inside", "outside"] | None = None,
920+
copy: bool = True,
921+
) -> Self:
922+
"""
923+
Pad or backfill values, used by Series/DataFrame ffill and bfill.
924+
925+
Parameters
926+
----------
927+
method : {'backfill', 'bfill', 'pad', 'ffill'}
928+
Method to use for filling holes in reindexed Series:
929+
930+
* pad / ffill: propagate last valid observation forward to next valid.
931+
* backfill / bfill: use NEXT valid observation to fill gap.
932+
933+
limit : int, default None
934+
This is the maximum number of consecutive
935+
NaN values to forward/backward fill. In other words, if there is
936+
a gap with more than this number of consecutive NaNs, it will only
937+
be partially filled. If method is not specified, this is the
938+
maximum number of entries along the entire axis where NaNs will be
939+
filled.
940+
941+
copy : bool, default True
942+
Whether to make a copy of the data before filling. If False, then
943+
the original should be modified and no new memory should be allocated.
944+
For ExtensionArray subclasses that cannot do this, it is at the
945+
author's discretion whether to ignore "copy=False" or to raise.
946+
The base class implementation ignores the keyword if any NAs are
947+
present.
948+
949+
Returns
950+
-------
951+
Same type as self
952+
953+
Examples
954+
--------
955+
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
956+
>>> arr.pad_or_backfill(method="backfill", limit=1)
957+
<IntegerArray>
958+
[<NA>, 2, 2, 3, <NA>, <NA>]
959+
Length: 6, dtype: Int64
960+
"""
961+
962+
# If a 3rd-party EA has implemented this functionality in fillna,
963+
# we warn that they need to implement pad_or_backfill instead.
964+
if (
965+
type(self).fillna is not ExtensionArray.fillna
966+
and type(self).pad_or_backfill is ExtensionArray.pad_or_backfill
967+
):
968+
# Check for pad_or_backfill here allows us to call
969+
# super().pad_or_backfill without getting this warning
970+
warnings.warn(
971+
"ExtensionArray.fillna 'method' keyword is deprecated. "
972+
"In a future version. arr.pad_or_backfill will be called "
973+
"instead. 3rd-party ExtensionArray authors need to implement "
974+
"pad_or_backfill.",
975+
FutureWarning,
976+
stacklevel=find_stack_level(),
977+
)
978+
return self.fillna(method=method, limit=limit)
979+
980+
mask = self.isna()
981+
982+
if mask.any():
983+
# NB: the base class does not respect the "copy" keyword
984+
meth = missing.clean_fill_method(method)
985+
986+
npmask = np.asarray(mask)
987+
if meth == "pad":
988+
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
989+
return self.take(indexer, allow_fill=True)
990+
else:
991+
# i.e. meth == "backfill"
992+
indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
993+
return self[::-1].take(indexer, allow_fill=True)
994+
995+
else:
996+
if not copy:
997+
return self
998+
new_values = self.copy()
999+
return new_values
1000+
9101001
def fillna(
9111002
self,
9121003
value: object | ArrayLike | None = None,
@@ -921,14 +1012,16 @@ def fillna(
9211012
----------
9221013
value : scalar, array-like
9231014
If a scalar value is passed it is used to fill all missing values.
924-
Alternatively, an array-like 'value' can be given. It's expected
1015+
Alternatively, an array-like "value" can be given. It's expected
9251016
that the array-like have the same length as 'self'.
9261017
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
9271018
Method to use for filling holes in reindexed Series:
9281019
9291020
* pad / ffill: propagate last valid observation forward to next valid.
9301021
* backfill / bfill: use NEXT valid observation to fill gap.
9311022
1023+
.. deprecated:: 2.1.0
1024+
9321025
limit : int, default None
9331026
If method is specified, this is the maximum number of consecutive
9341027
NaN values to forward/backward fill. In other words, if there is
@@ -937,6 +1030,8 @@ def fillna(
9371030
maximum number of entries along the entire axis where NaNs will be
9381031
filled.
9391032
1033+
.. deprecated:: 2.1.0
1034+
9401035
copy : bool, default True
9411036
Whether to make a copy of the data before filling. If False, then
9421037
the original should be modified and no new memory should be allocated.
@@ -958,6 +1053,15 @@ def fillna(
9581053
[0, 0, 2, 3, 0, 0]
9591054
Length: 6, dtype: Int64
9601055
"""
1056+
if method is not None:
1057+
warnings.warn(
1058+
f"The 'method' keyword in {type(self).__name__}.fillna is "
1059+
"deprecated and will be removed in a future version. "
1060+
"Use pad_or_backfill instead.",
1061+
FutureWarning,
1062+
stacklevel=find_stack_level(),
1063+
)
1064+
9611065
value, method = validate_fillna_kwargs(value, method)
9621066

9631067
mask = self.isna()

pandas/core/arrays/interval.py

+15
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
ArrayLike,
3030
AxisInt,
3131
Dtype,
32+
FillnaOptions,
3233
IntervalClosedType,
3334
NpDtype,
3435
PositionalIndexer,
@@ -889,6 +890,20 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
889890
indexer = obj.argsort()[-1]
890891
return obj[indexer]
891892

893+
def pad_or_backfill( # pylint: disable=useless-parent-delegation
894+
self,
895+
*,
896+
method: FillnaOptions,
897+
limit: int | None = None,
898+
limit_area: Literal["inside", "outside"] | None = None,
899+
copy: bool = True,
900+
) -> Self:
901+
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
902+
# this method entirely.
903+
return super().pad_or_backfill(
904+
method=method, limit=limit, limit_area=limit_area, copy=copy
905+
)
906+
892907
def fillna(
893908
self, value=None, method=None, limit: int | None = None, copy: bool = True
894909
) -> Self:

pandas/core/arrays/masked.py

+31
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
AstypeArg,
2525
AxisInt,
2626
DtypeObj,
27+
FillnaOptions,
2728
NpDtype,
2829
PositionalIndexer,
2930
Scalar,
@@ -189,6 +190,36 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:
189190

190191
return self._simple_new(self._data[item], newmask)
191192

193+
def pad_or_backfill(
194+
self,
195+
*,
196+
method: FillnaOptions,
197+
limit: int | None = None,
198+
limit_area: Literal["inside", "outside"] | None = None,
199+
copy: bool = True,
200+
) -> Self:
201+
mask = self._mask
202+
203+
if mask.any():
204+
func = missing.get_fill_func(method, ndim=self.ndim)
205+
206+
npvalues = self._data.T
207+
new_mask = mask.T
208+
if copy:
209+
npvalues = npvalues.copy()
210+
new_mask = new_mask.copy()
211+
func(npvalues, limit=limit, mask=new_mask)
212+
if copy:
213+
return self._simple_new(npvalues.T, new_mask.T)
214+
else:
215+
return self
216+
else:
217+
if copy:
218+
new_values = self.copy()
219+
else:
220+
new_values = self
221+
return new_values
222+
192223
@doc(ExtensionArray.fillna)
193224
def fillna(
194225
self, value=None, method=None, limit: int | None = None, copy: bool = True

pandas/core/arrays/numpy_.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ def pad_or_backfill(
244244
self,
245245
*,
246246
method: FillnaOptions,
247-
limit: int | None,
247+
limit: int | None = None,
248248
limit_area: Literal["inside", "outside"] | None = None,
249249
copy: bool = True,
250250
) -> Self:

pandas/core/arrays/period.py

+20
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@
7878
from pandas._typing import (
7979
AnyArrayLike,
8080
Dtype,
81+
FillnaOptions,
8182
NpDtype,
8283
NumpySorter,
8384
NumpyValueArrayLike,
@@ -790,6 +791,25 @@ def searchsorted(
790791
m8arr = self._ndarray.view("M8[ns]")
791792
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)
792793

794+
def pad_or_backfill(
795+
self,
796+
*,
797+
method: FillnaOptions,
798+
limit: int | None = None,
799+
limit_area: Literal["inside", "outside"] | None = None,
800+
copy: bool = True,
801+
) -> Self:
802+
# view as dt64 so we get treated as timelike in core.missing,
803+
# similar to dtl._period_dispatch
804+
dta = self.view("M8[ns]")
805+
result = dta.pad_or_backfill(
806+
method=method, limit=limit, limit_area=limit_area, copy=copy
807+
)
808+
if copy:
809+
return cast("Self", result.view(self.dtype))
810+
else:
811+
return self
812+
793813
def fillna(
794814
self, value=None, method=None, limit: int | None = None, copy: bool = True
795815
) -> Self:

0 commit comments

Comments
 (0)