From de31d92514320c8d473d6bfb5db0e7b900ec8cf6 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Jul 2023 11:53:50 -0700 Subject: [PATCH 1/8] REF: implement EA.pad_or_backfill --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/arrays/_mixins.py | 33 ++++++- pandas/core/arrays/arrow/array.py | 18 ++++ pandas/core/arrays/base.py | 93 +++++++++++++++++++ pandas/core/arrays/interval.py | 11 +++ pandas/core/arrays/masked.py | 31 +++++++ pandas/core/arrays/numpy_.py | 2 +- pandas/core/arrays/period.py | 20 ++++ pandas/core/arrays/sparse/array.py | 24 +++++ pandas/core/internals/blocks.py | 4 +- pandas/tests/arrays/test_datetimelike.py | 2 +- pandas/tests/arrays/test_datetimes.py | 10 +- pandas/tests/extension/base/dim2.py | 17 ++-- pandas/tests/extension/base/missing.py | 2 +- pandas/tests/extension/decimal/array.py | 10 ++ .../tests/extension/decimal/test_decimal.py | 28 +++++- 16 files changed, 284 insertions(+), 22 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1119117c411d3..0457a7556140f 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -315,6 +315,7 @@ Deprecations - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`) +- Deprecated the "method" and "limit" keywords in :meth:`ExtensionArray.fillna`, implement and use :meth:`ExtensionArray.pad_or_backfill` instead (:issue:`53621`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index f586de3d2bdee..874a32cee6d1a 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -19,6 +19,7 @@ AxisInt, Dtype, F, + FillnaOptions, PositionalIndexer2D, PositionalIndexerTuple, ScalarIndexer, @@ -294,6 +295,37 @@ def _fill_mask_inplace( func = missing.get_fill_func(method, ndim=self.ndim) func(self._ndarray.T, limit=limit, mask=mask.T) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + mask = self.isna() + if mask.any(): + # (for now) when self.ndim == 2, we assume axis=0 + func = missing.get_fill_func(method, ndim=self.ndim) + + npvalues = self._ndarray.T + if copy: + npvalues = npvalues.copy() + func(npvalues, limit=limit, mask=mask.T) + npvalues = npvalues.T + + if copy: + new_values = self._from_backing_data(npvalues) + else: + new_values = self + + else: + if copy: + new_values = self.copy() + else: + new_values = self + return new_values + @doc(ExtensionArray.fillna) def fillna(self, value=None, method=None, limit: int | None = None) -> Self: value, method = validate_fillna_kwargs( @@ -309,7 +341,6 @@ def fillna(self, value=None, method=None, limit: int | None = None) -> Self: if mask.any(): if method is not None: - # TODO: check value is None # (for now) when self.ndim == 2, we assume axis=0 func = missing.get_fill_func(method, ndim=self.ndim) npvalues = self._ndarray.T.copy() diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 284044dfadfef..2493b02fd2a85 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -899,6 +899,24 @@ def dropna(self) -> Self: """ return type(self)(pc.drop_null(self._pa_array)) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + if not self._hasna: + # TODO(CoW): Not necessary anymore when CoW is the default + return self.copy() + + # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove + # this method entirely. + return super().pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) + @doc(ExtensionArray.fillna) def fillna( self, diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 64f917a419391..cdc8db2ad720a 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -20,6 +20,7 @@ cast, overload, ) +import warnings import numpy as np @@ -35,6 +36,7 @@ Substitution, cache_readonly, ) +from pandas.util._exceptions import find_stack_level from pandas.util._validators import ( validate_bool_kwarg, validate_fillna_kwargs, @@ -127,6 +129,7 @@ class ExtensionArray: interpolate isin isna + pad_or_backfill ravel repeat searchsorted @@ -177,6 +180,7 @@ class ExtensionArray: methods: * fillna + * pad_or_backfill * dropna * unique * factorize / _values_for_factorize @@ -782,6 +786,82 @@ def interpolate( f"{type(self).__name__} does not implement interpolate" ) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + """ + pad or backfill values, used by Series/DataFrame ffill and bfill. + + Parameters + ---------- + method : {'backfill', 'bfill', 'pad', 'ffill'} + Method to use for filling holes in reindexed Series: + + * pad / ffill: propagate last valid observation forward to next valid. + * backfill / bfill: use NEXT valid observation to fill gap. + + limit : int, default None + This is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + copy : bool, default True + Whether to make a copy of the data before filling. If False, then + the original should be modified and no new memory should be allocated. + For ExtensionArray subclasses that cannot do this, it is at the + author's discretion whether to ignore "copy=False" or to raise. + The base class implementation ignores the keyword if any NAs are + present. + + """ + + # If a 3rd-party EA has implemented this functionality in fillna, + # we warn that they need to implement pad_or_backfill instead. + if ( + type(self).fillna is not ExtensionArray.fillna + and type(self).pad_or_backfill is ExtensionArray.pad_or_backfill + ): + # Check for pad_or_backfill here allows us to call + # super().pad_or_backfill without getting this warning + warnings.warn( + "ExtensionArray.fillna 'method' keyword is deprecated. " + "In a future version. arr.pad_or_backfill will be called " + "instead. 3rd-party ExtensionArray authors need to implement " + "pad_or_backfill.", + FutureWarning, + stacklevel=find_stack_level(), + ) + return self.fillna(method=method, limit=limit) + + mask = self.isna() + + if mask.any(): + # NB: the base class does not respect the "copy" keyword + meth = missing.clean_fill_method(method) + + npmask = np.asarray(mask) + if meth == "pad": + indexer = libalgos.get_fill_indexer(npmask, limit=limit) + return self.take(indexer, allow_fill=True) + else: + # i.e. meth == "backfill" + indexer = libalgos.get_fill_indexer(npmask[::-1], limit=limit)[::-1] + return self[::-1].take(indexer, allow_fill=True) + + else: + if not copy: + return self + new_values = self.copy() + return new_values + def fillna( self, value: object | ArrayLike | None = None, @@ -803,6 +883,8 @@ def fillna( * pad / ffill: propagate last valid observation forward to next valid. * backfill / bfill: use NEXT valid observation to fill gap. + .. deprecated:: 2.1.0 + limit : int, default None If method is specified, this is the maximum number of consecutive NaN values to forward/backward fill. In other words, if there is @@ -811,11 +893,22 @@ def fillna( maximum number of entries along the entire axis where NaNs will be filled. + .. deprecated:: 2.1.0 + Returns ------- ExtensionArray With NA/NaN filled. """ + if method is not None: + warnings.warn( + f"The 'method' keyword in {type(self).__name__}.fillna is " + "deprecated and will be removed in a future version. " + "Use pad_or_backfill instead.", + FutureWarning, + stacklevel=find_stack_level(), + ) + value, method = validate_fillna_kwargs(value, method) mask = self.isna() diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 446c0957db343..6eeee7bcdbb03 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -31,6 +31,7 @@ ArrayLike, AxisInt, Dtype, + FillnaOptions, IntervalClosedType, NpDtype, PositionalIndexer, @@ -886,6 +887,16 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr indexer = obj.argsort()[-1] return obj[indexer] + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + raise TypeError("Filling by method is not supported for IntervalArray.") + def fillna(self, value=None, method=None, limit: int | None = None) -> Self: """ Fill NA/NaN values using the specified method. diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 15c485cbb1499..268edf83bfaab 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -25,6 +25,7 @@ AstypeArg, AxisInt, DtypeObj, + FillnaOptions, NpDtype, PositionalIndexer, Scalar, @@ -178,6 +179,36 @@ def __getitem__(self, item: PositionalIndexer) -> Self | Any: return self._simple_new(self._data[item], newmask) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + mask = self._mask + + if mask.any(): + func = missing.get_fill_func(method, ndim=self.ndim) + + npvalues = self._data.T + new_mask = mask.T + if copy: + npvalues = npvalues.copy() + new_mask = new_mask.copy() + func(npvalues, limit=limit, mask=new_mask) + if copy: + return self._simple_new(npvalues.T, new_mask.T) + else: + return self + else: + if copy: + new_values = self.copy() + else: + new_values = self + return new_values + @doc(ExtensionArray.fillna) def fillna(self, value=None, method=None, limit: int | None = None) -> Self: value, method = validate_fillna_kwargs(value, method) diff --git a/pandas/core/arrays/numpy_.py b/pandas/core/arrays/numpy_.py index 5f02053a454ed..1788410fc05fd 100644 --- a/pandas/core/arrays/numpy_.py +++ b/pandas/core/arrays/numpy_.py @@ -233,7 +233,7 @@ def pad_or_backfill( self, *, method: FillnaOptions, - limit: int | None, + limit: int | None = None, limit_area: Literal["inside", "outside"] | None = None, copy: bool = True, ) -> Self: diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index c9c2d258a9a16..f50c03543982d 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -77,6 +77,7 @@ from pandas._typing import ( AnyArrayLike, Dtype, + FillnaOptions, NpDtype, NumpySorter, NumpyValueArrayLike, @@ -789,6 +790,25 @@ def searchsorted( m8arr = self._ndarray.view("M8[ns]") return m8arr.searchsorted(npvalue, side=side, sorter=sorter) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + # view as dt64 so we get treated as timelike in core.missing, + # similar to dtl._period_dispatch + dta = self.view("M8[ns]") + result = dta.pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) + if copy: + return cast("Self", result.view(self.dtype)) + else: + return self + def fillna(self, value=None, method=None, limit: int | None = None) -> Self: if method is not None: # view as dt64 so we get treated as timelike in core.missing, diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index aba6811c5eeb7..0d36219f44ad3 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -713,6 +713,30 @@ def isna(self): mask[self.sp_index.indices] = isna(self.sp_values) return type(self)(mask, fill_value=False, dtype=dtype) + def pad_or_backfill( + self, + *, + method: FillnaOptions, + limit: int | None = None, + limit_area: Literal["inside", "outside"] | None = None, + copy: bool = True, + ) -> Self: + msg = "pad_or_backfill with 'method' requires high memory usage." + warnings.warn( + msg, + PerformanceWarning, + stacklevel=find_stack_level(), + ) + new_values = np.asarray(self) + # pad_or_backfill_inplace modifies new_values inplace + # error: Argument "method" to "pad_or_backfill_inplace" has incompatible + # type "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected + # "Literal['pad', 'backfill']" + pad_or_backfill_inplace( + new_values, method=method, limit=limit # type: ignore[arg-type] + ) + return type(self)(new_values, fill_value=self.fill_value) + def fillna( self, value=None, diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 4480a1a0c6746..b33b7a70858d3 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1887,9 +1887,9 @@ def pad_or_backfill( values = self.values if values.ndim == 2 and axis == 1: # NDArrayBackedExtensionArray.fillna assumes axis=0 - new_values = values.T.fillna(method=method, limit=limit).T + new_values = values.T.pad_or_backfill(method=method, limit=limit).T else: - new_values = values.fillna(method=method, limit=limit) + new_values = values.pad_or_backfill(method=method, limit=limit) return [self.make_block_same_class(new_values)] diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index 9e402af931199..4cdcdbbbb7167 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -251,7 +251,7 @@ def test_fillna_method_doesnt_change_orig(self, method): fill_value = arr[3] if method == "pad" else arr[5] - result = arr.fillna(method=method) + result = arr.pad_or_backfill(method=method) assert result[4] == fill_value # check that the original was not changed diff --git a/pandas/tests/arrays/test_datetimes.py b/pandas/tests/arrays/test_datetimes.py index 1fe1d4efbefd7..8e38a8c741b8d 100644 --- a/pandas/tests/arrays/test_datetimes.py +++ b/pandas/tests/arrays/test_datetimes.py @@ -497,7 +497,7 @@ def test_fillna_preserves_tz(self, method): dtype=DatetimeTZDtype(tz="US/Central"), ) - result = arr.fillna(method=method) + result = arr.pad_or_backfill(method=method) tm.assert_extension_array_equal(result, expected) # assert that arr and dti were not modified in-place @@ -510,12 +510,12 @@ def test_fillna_2d(self): dta[0, 1] = pd.NaT dta[1, 0] = pd.NaT - res1 = dta.fillna(method="pad") + res1 = dta.pad_or_backfill(method="pad") expected1 = dta.copy() expected1[1, 0] = dta[0, 0] tm.assert_extension_array_equal(res1, expected1) - res2 = dta.fillna(method="backfill") + res2 = dta.pad_or_backfill(method="backfill") expected2 = dta.copy() expected2 = dta.copy() expected2[1, 0] = dta[2, 0] @@ -529,10 +529,10 @@ def test_fillna_2d(self): assert not dta2._ndarray.flags["C_CONTIGUOUS"] tm.assert_extension_array_equal(dta, dta2) - res3 = dta2.fillna(method="pad") + res3 = dta2.pad_or_backfill(method="pad") tm.assert_extension_array_equal(res3, expected1) - res4 = dta2.fillna(method="backfill") + res4 = dta2.pad_or_backfill(method="backfill") tm.assert_extension_array_equal(res4, expected2) # test the DataFrame method while we're here diff --git a/pandas/tests/extension/base/dim2.py b/pandas/tests/extension/base/dim2.py index 6847c5c183267..a7e530c3d7d37 100644 --- a/pandas/tests/extension/base/dim2.py +++ b/pandas/tests/extension/base/dim2.py @@ -155,16 +155,14 @@ def test_concat_2d(self, data): @pytest.mark.parametrize("method", ["backfill", "pad"]) def test_fillna_2d_method(self, data_missing, method): + # pad_or_backfill is always along axis=0 arr = data_missing.repeat(2).reshape(2, 2) assert arr[0].isna().all() assert not arr[1].isna().any() - try: - result = arr.pad_or_backfill(method=method, limit=None) - except AttributeError: - result = arr.fillna(method=method, limit=None) + result = arr.pad_or_backfill(method=method, limit=None) - expected = data_missing.fillna(method=method).repeat(2).reshape(2, 2) + expected = data_missing.pad_or_backfill(method=method).repeat(2).reshape(2, 2) self.assert_extension_array_equal(result, expected) # Reverse so that backfill is not a no-op. @@ -172,12 +170,11 @@ def test_fillna_2d_method(self, data_missing, method): assert not arr2[0].isna().any() assert arr2[1].isna().all() - try: - result2 = arr2.pad_or_backfill(method=method, limit=None) - except AttributeError: - result2 = arr2.fillna(method=method, limit=None) + result2 = arr2.pad_or_backfill(method=method, limit=None) - expected2 = data_missing[::-1].fillna(method=method).repeat(2).reshape(2, 2) + expected2 = ( + data_missing[::-1].pad_or_backfill(method=method).repeat(2).reshape(2, 2) + ) self.assert_extension_array_equal(result2, expected2) @pytest.mark.parametrize("method", ["mean", "median", "var", "std", "sum", "prod"]) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 43f37a020df3f..a839a9d327f95 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -95,7 +95,7 @@ def test_fillna_no_op_returns_copy(self, data): assert result is not data self.assert_extension_array_equal(result, data) - result = data.fillna(method="backfill") + result = data.pad_or_backfill(method="backfill") assert result is not data self.assert_extension_array_equal(result, data) diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 393c01488c234..10ddc0481a2a3 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -276,6 +276,16 @@ def convert_values(param): def value_counts(self, dropna: bool = True): return value_counts(self.to_numpy(), dropna=dropna) + # We override fillna here to simulate a 3rd party EA that has done so. This + # lets us test the deprecation telling authors to implement pad_or_backfill + def fillna( + self, + value=None, + method=None, + limit: int | None = None, + ): + return super().fillna(value=value, method=method, limit=limit) + def to_decimal(values, context=None): return DecimalArray([decimal.Decimal(x) for x in values], context=context) diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index afd04817f05c7..1398953991945 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -97,7 +97,33 @@ class TestIndex(base.BaseIndexTests): class TestMissing(base.BaseMissingTests): - pass + def test_fillna_limit_pad(self, data_missing): + msg = "ExtensionArray.fillna 'method' keyword is deprecated" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_pad(data_missing) + + def test_fillna_limit_backfill(self, data_missing): + msg = "ExtensionArray.fillna 'method' keyword is deprecated" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_limit_backfill(data_missing) + + def test_fillna_no_op_returns_copy(self, data): + msg = "ExtensionArray.fillna 'method' keyword is deprecated" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_no_op_returns_copy(data) + + def test_fillna_series_method(self, data_missing, fillna_method): + msg = "ExtensionArray.fillna 'method' keyword is deprecated" + with tm.assert_produces_warning( + FutureWarning, match=msg, check_stacklevel=False + ): + super().test_fillna_series_method(data_missing, fillna_method) class Reduce: From c30b1a99362a96ff1a9603bd3f32f932430bbde5 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 4 Jul 2023 21:06:59 -0700 Subject: [PATCH 2/8] Update doc --- ci/code_checks.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/code_checks.sh b/ci/code_checks.sh index 756096a7fe345..b1c9c22304f0c 100755 --- a/ci/code_checks.sh +++ b/ci/code_checks.sh @@ -148,6 +148,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then pandas.api.extensions.ExtensionArray.interpolate \ pandas.api.extensions.ExtensionArray.isin \ pandas.api.extensions.ExtensionArray.isna \ + pandas.api.extensions.ExtensionArray.pad_or_backfill \ pandas.api.extensions.ExtensionArray.ravel \ pandas.api.extensions.ExtensionArray.searchsorted \ pandas.api.extensions.ExtensionArray.shift \ From 791e34f68779a35bc91efbfefbe74c04da9f1d5d Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 5 Jul 2023 07:58:26 -0700 Subject: [PATCH 3/8] returns section in docstring --- pandas/core/arrays/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cdc8db2ad720a..e02a403b69a7f 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -821,6 +821,9 @@ def pad_or_backfill( The base class implementation ignores the keyword if any NAs are present. + Returns + ------- + Same type as self """ # If a 3rd-party EA has implemented this functionality in fillna, From 777fb836766593442cfba3f5c645a1254e53e812 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Jul 2023 14:55:17 -0700 Subject: [PATCH 4/8] docstring fixup --- pandas/core/arrays/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e02a403b69a7f..cd73490e2e279 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -795,7 +795,7 @@ def pad_or_backfill( copy: bool = True, ) -> Self: """ - pad or backfill values, used by Series/DataFrame ffill and bfill. + Pad or backfill values, used by Series/DataFrame ffill and bfill. Parameters ---------- From 531d9e643f8d12b73023843ed4d0c008a9f90982 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 6 Jul 2023 18:04:55 -0700 Subject: [PATCH 5/8] troubleshoot docbuild --- doc/source/whatsnew/v2.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 0457a7556140f..76b300e074dfb 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -315,7 +315,7 @@ Deprecations - Deprecated parameter ``obj`` in :meth:`GroupBy.get_group` (:issue:`53545`) - Deprecated positional indexing on :class:`Series` with :meth:`Series.__getitem__` and :meth:`Series.__setitem__`, in a future version ``ser[item]`` will *always* interpret ``item`` as a label, not a position (:issue:`50617`) - Deprecated strings ``T``, ``t``, ``L`` and ``l`` denoting units in :func:`to_timedelta` (:issue:`52536`) -- Deprecated the "method" and "limit" keywords in :meth:`ExtensionArray.fillna`, implement and use :meth:`ExtensionArray.pad_or_backfill` instead (:issue:`53621`) +- Deprecated the "method" and "limit" keywords in ``ExtensionArray.fillna``, implement and use ``pad_or_backfill`` instead (:issue:`53621`) - Deprecated the "method" and "limit" keywords on :meth:`Series.fillna`, :meth:`DataFrame.fillna`, :meth:`SeriesGroupBy.fillna`, :meth:`DataFrameGroupBy.fillna`, and :meth:`Resampler.fillna`, use ``obj.bfill()`` or ``obj.ffill()`` instead (:issue:`53394`) - Deprecated the ``method`` and ``limit`` keywords in :meth:`DataFrame.replace` and :meth:`Series.replace` (:issue:`33302`) - Deprecated values "pad", "ffill", "bfill", "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate`, use ``obj.ffill()`` or ``obj.bfill()`` instead (:issue:`53581`) From 68e67e651c80a78ab9ac1107497e7a735158e126 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 7 Jul 2023 09:28:05 -0700 Subject: [PATCH 6/8] update doc --- doc/source/reference/extensions.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/reference/extensions.rst b/doc/source/reference/extensions.rst index 63eacc3f6d1d9..371e06f5dd5bd 100644 --- a/doc/source/reference/extensions.rst +++ b/doc/source/reference/extensions.rst @@ -54,6 +54,7 @@ objects. api.extensions.ExtensionArray.interpolate api.extensions.ExtensionArray.isin api.extensions.ExtensionArray.isna + api.extensions.ExtensionArray.pad_or_backfill api.extensions.ExtensionArray.ravel api.extensions.ExtensionArray.repeat api.extensions.ExtensionArray.searchsorted From 3e2f87488d88614d8b73a0fd41455d080fbe7a25 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 24 Jul 2023 19:08:40 -0700 Subject: [PATCH 7/8] Avoid warning --- pandas/core/arrays/interval.py | 6 +++++- pandas/core/arrays/sparse/array.py | 18 ++++-------------- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 68eb37c7720ba..4bc90fe77706e 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -898,7 +898,11 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, copy: bool = True, ) -> Self: - raise TypeError("Filling by method is not supported for IntervalArray.") + # TODO(3.0): after EA.fillna 'method' deprecation is enforced, we can remove + # this method entirely. + return super().pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy + ) def fillna(self, value=None, method=None, limit: int | None = None) -> Self: """ diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 50fa0fbf38cc6..9b449acc46f29 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -720,21 +720,11 @@ def pad_or_backfill( limit_area: Literal["inside", "outside"] | None = None, copy: bool = True, ) -> Self: - msg = "pad_or_backfill with 'method' requires high memory usage." - warnings.warn( - msg, - PerformanceWarning, - stacklevel=find_stack_level(), + # TODO(3.0): We can remove this method once deprecation for fillna method + # keyword is enforced. + return super().pad_or_backfill( + method=method, limit=limit, limit_area=limit_area, copy=copy ) - new_values = np.asarray(self) - # pad_or_backfill_inplace modifies new_values inplace - # error: Argument "method" to "pad_or_backfill_inplace" has incompatible - # type "Literal['backfill', 'bfill', 'ffill', 'pad']"; expected - # "Literal['pad', 'backfill']" - pad_or_backfill_inplace( - new_values, method=method, limit=limit # type: ignore[arg-type] - ) - return type(self)(new_values, fill_value=self.fill_value) def fillna( self, From 2bc4708b96c6c958f98c2607c845604c3ddbd6b1 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 25 Jul 2023 08:11:04 -0700 Subject: [PATCH 8/8] pylint ignores --- pandas/core/arrays/interval.py | 2 +- pandas/core/arrays/sparse/array.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/interval.py b/pandas/core/arrays/interval.py index 4bc90fe77706e..dfce0338d5107 100644 --- a/pandas/core/arrays/interval.py +++ b/pandas/core/arrays/interval.py @@ -890,7 +890,7 @@ def max(self, *, axis: AxisInt | None = None, skipna: bool = True) -> IntervalOr indexer = obj.argsort()[-1] return obj[indexer] - def pad_or_backfill( + def pad_or_backfill( # pylint: disable=useless-parent-delegation self, *, method: FillnaOptions, diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 9b449acc46f29..015c38f000147 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -712,7 +712,7 @@ def isna(self): mask[self.sp_index.indices] = isna(self.sp_values) return type(self)(mask, fill_value=False, dtype=dtype) - def pad_or_backfill( + def pad_or_backfill( # pylint: disable=useless-parent-delegation self, *, method: FillnaOptions,