Skip to content

REF: implement EA.pad_or_backfill #54001

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Jul 31, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/reference/extensions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ objects.
api.extensions.ExtensionArray.interpolate
api.extensions.ExtensionArray.isin
api.extensions.ExtensionArray.isna
api.extensions.ExtensionArray.pad_or_backfill
api.extensions.ExtensionArray.ravel
api.extensions.ExtensionArray.repeat
api.extensions.ExtensionArray.searchsorted
Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,7 @@ Other Deprecations
- Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`)
- Deprecated replacing builtin and NumPy functions in ``.agg``, ``.apply``, and ``.transform``; use the corresponding string alias (e.g. ``"sum"`` for ``sum`` or ``np.sum``) instead (:issue:`53425`)
- Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`)
- Deprecated the "method" and "limit" keywords in ``ExtensionArray.fillna``, implement and use ``pad_or_backfill`` instead (:issue:`53621`)
- Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`)
- Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`)
- Deprecated the use of non-supported datetime64 and timedelta64 resolutions with :func:`pandas.array`. Supported resolutions are: "s", "ms", "us", "ns" resolutions (:issue:`53058`)
Expand Down
33 changes: 32 additions & 1 deletion pandas/core/arrays/_mixins.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
AxisInt,
Dtype,
F,
FillnaOptions,
PositionalIndexer2D,
PositionalIndexerTuple,
ScalarIndexer,
Expand Down Expand Up @@ -295,6 +296,37 @@ def _fill_mask_inplace(
func = missing.get_fill_func(method, ndim=self.ndim)
func(self._ndarray.T, limit=limit, mask=mask.T)

def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
mask = self.isna()
if mask.any():
# (for now) when self.ndim == 2, we assume axis=0
func = missing.get_fill_func(method, ndim=self.ndim)

npvalues = self._ndarray.T
if copy:
npvalues = npvalues.copy()
func(npvalues, limit=limit, mask=mask.T)
npvalues = npvalues.T

if copy:
new_values = self._from_backing_data(npvalues)
else:
new_values = self

else:
if copy:
new_values = self.copy()
else:
new_values = self
return new_values

@doc(ExtensionArray.fillna)
def fillna(
self, value=None, method=None, limit: int | None = None, copy: bool = True
Expand All @@ -312,7 +344,6 @@ def fillna(

if mask.any():
if method is not None:
# TODO: check value is None
# (for now) when self.ndim == 2, we assume axis=0
func = missing.get_fill_func(method, ndim=self.ndim)
npvalues = self._ndarray.T
Expand Down
18 changes: 18 additions & 0 deletions pandas/core/arrays/arrow/array.py
Original file line number Diff line number Diff line change
Expand Up @@ -899,6 +899,24 @@ def dropna(self) -> Self:
"""
return type(self)(pc.drop_null(self._pa_array))

def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
if not self._hasna:
# TODO(CoW): Not necessary anymore when CoW is the default
return self.copy()

# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
# this method entirely.
return super().pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)

@doc(ExtensionArray.fillna)
def fillna(
self,
Expand Down
106 changes: 105 additions & 1 deletion pandas/core/arrays/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -33,6 +34,7 @@
Substitution,
cache_readonly,
)
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import (
validate_bool_kwarg,
validate_fillna_kwargs,
Expand Down Expand Up @@ -130,6 +132,7 @@ class ExtensionArray:
interpolate
isin
isna
pad_or_backfill
ravel
repeat
searchsorted
Expand Down Expand Up @@ -180,6 +183,7 @@ class ExtensionArray:
methods:

* fillna
* pad_or_backfill
* dropna
* unique
* factorize / _values_for_factorize
Expand Down Expand Up @@ -907,6 +911,93 @@ def interpolate(
f"{type(self).__name__} does not implement interpolate"
)

def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
"""
Pad or backfill values, used by Series/DataFrame ffill and bfill.

Parameters
----------
method : {'backfill', 'bfill', 'pad', 'ffill'}
Method to use for filling holes in reindexed Series:

* pad / ffill: propagate last valid observation forward to next valid.
* backfill / bfill: use NEXT valid observation to fill gap.

limit : int, default None
This is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled.

copy : bool, default True
Whether to make a copy of the data before filling. If False, then
the original should be modified and no new memory should be allocated.
For ExtensionArray subclasses that cannot do this, it is at the
author's discretion whether to ignore "copy=False" or to raise.
The base class implementation ignores the keyword if any NAs are
present.

Returns
-------
Same type as self

Examples
--------
>>> arr = pd.array([np.nan, np.nan, 2, 3, np.nan, np.nan])
>>> arr.pad_or_backfill(method="backfill", limit=1)
<IntegerArray>
[<NA>, 2, 2, 3, <NA>, <NA>]
Length: 6, dtype: Int64
"""

# If a 3rd-party EA has implemented this functionality in fillna,
# we warn that they need to implement pad_or_backfill instead.
if (
type(self).fillna is not ExtensionArray.fillna
and type(self).pad_or_backfill is ExtensionArray.pad_or_backfill
):
# Check for pad_or_backfill here allows us to call
# super().pad_or_backfill without getting this warning
warnings.warn(
"ExtensionArray.fillna 'method' keyword is deprecated. "
"In a future version. arr.pad_or_backfill will be called "
"instead. 3rd-party ExtensionArray authors need to implement "
"pad_or_backfill.",
FutureWarning,
stacklevel=find_stack_level(),
)
return self.fillna(method=method, limit=limit)

mask = self.isna()

if mask.any():
# NB: the base class does not respect the "copy" keyword
meth = missing.clean_fill_method(method)

npmask = np.asarray(mask)
if meth == "pad":
indexer = libalgos.get_fill_indexer(npmask, limit=limit)
return self.take(indexer, allow_fill=True)
else:
# i.e. meth == "backfill"
indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1]
return self[::-1].take(indexer, allow_fill=True)

else:
if not copy:
return self
new_values = self.copy()
return new_values

def fillna(
self,
value: object | ArrayLike | None = None,
Expand All @@ -921,14 +1012,16 @@ def fillna(
----------
value : scalar, array-like
If a scalar value is passed it is used to fill all missing values.
Alternatively, an array-like 'value' can be given. It's expected
Alternatively, an array-like "value" can be given. It's expected
that the array-like have the same length as 'self'.
method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
Method to use for filling holes in reindexed Series:

* pad / ffill: propagate last valid observation forward to next valid.
* backfill / bfill: use NEXT valid observation to fill gap.

.. deprecated:: 2.1.0

limit : int, default None
If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
Expand All @@ -937,6 +1030,8 @@ def fillna(
maximum number of entries along the entire axis where NaNs will be
filled.

.. deprecated:: 2.1.0

copy : bool, default True
Whether to make a copy of the data before filling. If False, then
the original should be modified and no new memory should be allocated.
Expand All @@ -958,6 +1053,15 @@ def fillna(
[0, 0, 2, 3, 0, 0]
Length: 6, dtype: Int64
"""
if method is not None:
warnings.warn(
f"The 'method' keyword in {type(self).__name__}.fillna is "
"deprecated and will be removed in a future version. "
"Use pad_or_backfill instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

value, method = validate_fillna_kwargs(value, method)

mask = self.isna()
Expand Down
15 changes: 15 additions & 0 deletions pandas/core/arrays/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ArrayLike,
AxisInt,
Dtype,
FillnaOptions,
IntervalClosedType,
NpDtype,
PositionalIndexer,
Expand Down Expand Up @@ -889,6 +890,20 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr
indexer = obj.argsort()[-1]
return obj[indexer]

def pad_or_backfill( # pylint: disable=useless-parent-delegation
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
# TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove
# this method entirely.
return super().pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)

def fillna(
self, value=None, method=None, limit: int | None = None, copy: bool = True
) -> Self:
Expand Down
31 changes: 31 additions & 0 deletions pandas/core/arrays/masked.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
AstypeArg,
AxisInt,
DtypeObj,
FillnaOptions,
NpDtype,
PositionalIndexer,
Scalar,
Expand Down Expand Up @@ -189,6 +190,36 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any:

return self._simple_new(self._data[item], newmask)

def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
mask = self._mask

if mask.any():
func = missing.get_fill_func(method, ndim=self.ndim)

npvalues = self._data.T
new_mask = mask.T
if copy:
npvalues = npvalues.copy()
new_mask = new_mask.copy()
func(npvalues, limit=limit, mask=new_mask)
if copy:
return self._simple_new(npvalues.T, new_mask.T)
else:
return self
else:
if copy:
new_values = self.copy()
else:
new_values = self
return new_values

@doc(ExtensionArray.fillna)
def fillna(
self, value=None, method=None, limit: int | None = None, copy: bool = True
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/arrays/numpy_.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,7 +237,7 @@ def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
Expand Down
20 changes: 20 additions & 0 deletions pandas/core/arrays/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
from pandas._typing import (
AnyArrayLike,
Dtype,
FillnaOptions,
NpDtype,
NumpySorter,
NumpyValueArrayLike,
Expand Down Expand Up @@ -790,6 +791,25 @@ def searchsorted(
m8arr = self._ndarray.view("M8[ns]")
return m8arr.searchsorted(npvalue, side=side, sorter=sorter)

def pad_or_backfill(
self,
*,
method: FillnaOptions,
limit: int | None = None,
limit_area: Literal["inside", "outside"] | None = None,
copy: bool = True,
) -> Self:
# view as dt64 so we get treated as timelike in core.missing,
# similar to dtl._period_dispatch
dta = self.view("M8[ns]")
result = dta.pad_or_backfill(
method=method, limit=limit, limit_area=limit_area, copy=copy
)
if copy:
return cast("Self", result.view(self.dtype))
else:
return self

def fillna(
self, value=None, method=None, limit: int | None = None, copy: bool = True
) -> Self:
Expand Down
Loading