Skip to content

CLN: enforce deprecation of the method keyword on df.fillna #57760

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
36 changes: 30 additions & 6 deletions doc/source/whatsnew/v0.10.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -242,18 +242,42 @@ labeled the aggregated group with the end of the interval: the next day).
- Calling ``fillna`` on Series or DataFrame with no arguments is no longer
valid code. You must either specify a fill value or an interpolation method:

.. ipython:: python
:okwarning:
.. code-block:: ipython

s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4])
s
s.fillna(0)
s.fillna(method="pad")
In [6]: s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4])

In [7]: s
Out[7]:
0 NaN
1 1.0
2 2.0
3 NaN
4 4.0
dtype: float64

In [8]: s.fillna(0)
Out[8]:
0 0.0
1 1.0
2 2.0
3 0.0
4 4.0
dtype: float64

In [9]: s.fillna(method="pad")
Out[9]:
0 NaN
1 1.0
2 2.0
3 2.0
4 4.0
dtype: float64

Convenience methods ``ffill`` and ``bfill`` have been added:

.. ipython:: python

s = pd.Series([np.nan, 1.0, 2.0, np.nan, 4])
s.ffill()


Expand Down
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,7 @@ Removal of prior version deprecations/changes
- Removed ``year``, ``month``, ``quarter``, ``day``, ``hour``, ``minute``, and ``second`` keywords in the :class:`PeriodIndex` constructor, use :meth:`PeriodIndex.from_fields` instead (:issue:`55960`)
- Removed deprecated argument ``obj`` in :meth:`.DataFrameGroupBy.get_group` and :meth:`.SeriesGroupBy.get_group` (:issue:`53545`)
- Removed deprecated behavior of :meth:`Series.agg` using :meth:`Series.apply` (:issue:`53325`)
- Removed deprecated keyword ``method`` on :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`57760`)
- Removed option ``mode.use_inf_as_na``, convert inf entries to ``NaN`` before instead (:issue:`51684`)
- Removed support for :class:`DataFrame` in :meth:`DataFrame.from_records`(:issue:`51697`)
- Removed support for ``errors="ignore"`` in :func:`to_datetime`, :func:`to_timedelta` and :func:`to_numeric` (:issue:`55734`)
Expand Down
4 changes: 0 additions & 4 deletions pandas/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,10 +172,6 @@ def pytest_collection_modifyitems(items, config) -> None:
"DataFrameGroupBy.fillna",
"DataFrameGroupBy.fillna with 'method' is deprecated",
),
(
"DataFrameGroupBy.fillna",
"DataFrame.fillna with 'method' is deprecated",
),
("read_parquet", "Passing a BlockManager to DataFrame is deprecated"),
]

Expand Down
45 changes: 8 additions & 37 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6769,7 +6769,6 @@ def fillna(
self,
value: Hashable | Mapping | Series | DataFrame = ...,
*,
method: FillnaOptions | None = ...,
axis: Axis | None = ...,
inplace: Literal[False] = ...,
limit: int | None = ...,
Expand All @@ -6781,7 +6780,6 @@ def fillna(
self,
value: Hashable | Mapping | Series | DataFrame = ...,
*,
method: FillnaOptions | None = ...,
axis: Axis | None = ...,
inplace: Literal[True],
limit: int | None = ...,
Expand All @@ -6793,7 +6791,6 @@ def fillna(
self,
value: Hashable | Mapping | Series | DataFrame = ...,
*,
method: FillnaOptions | None = ...,
axis: Axis | None = ...,
inplace: bool = ...,
limit: int | None = ...,
Expand All @@ -6809,13 +6806,12 @@ def fillna(
self,
value: Hashable | Mapping | Series | DataFrame | None = None,
*,
method: FillnaOptions | None = None,
axis: Axis | None = None,
inplace: bool = False,
limit: int | None = None,
) -> Self | None:
"""
Fill NA/NaN values using the specified method.
Fill NA/NaN values with `value`.

Parameters
----------
Expand All @@ -6825,15 +6821,6 @@ def fillna(
each index (for a Series) or column (for a DataFrame). Values not
in the dict/Series/DataFrame will not be filled. This value cannot
be a list.
method : {{'backfill', 'bfill', 'ffill', None}}, default None
Method to use for filling holes in reindexed Series:

* ffill: propagate last valid observation forward to next valid.
* backfill / bfill: use next valid observation to fill gap.

.. deprecated:: 2.1.0
Use ffill or bfill instead.

axis : {axes_single_arg}
Axis along which to fill missing values. For `Series`
this parameter is unused and defaults to 0.
Expand All @@ -6842,12 +6829,8 @@ def fillna(
other views on this object (e.g., a no-copy slice for a column in a
DataFrame).
limit : int, default None
If method is specified, this is the maximum number of consecutive
NaN values to forward/backward fill. In other words, if there is
a gap with more than this number of consecutive NaNs, it will only
be partially filled. If method is not specified, this is the
maximum number of entries along the entire axis where NaNs will be
filled. Must be greater than 0 if not None.
This is the maximum number of entries along the entire axis
where NaNs will be filled. Must be greater than 0 if not None.

Returns
-------
Expand Down Expand Up @@ -6932,14 +6915,10 @@ def fillna(
stacklevel=2,
)

value, method = validate_fillna_kwargs(value, method)
if method is not None:
warnings.warn(
f"{type(self).__name__}.fillna with 'method' is deprecated and "
"will raise in a future version. Use obj.ffill() or obj.bfill() "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
if isinstance(value, (list, tuple)):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you check if we have a test where value is a np.array or pd.Series? Given the error message, I would expect this check to be something like if not (is_scalar(value) or isinstance(value, dict))

Copy link
Contributor Author

@natmokval natmokval Mar 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we have test_fillna_series in pandas/tests/extension/base/missing.py where we fill with a series

def test_fillna_series(self, data_missing):
fill_value = data_missing[1]
ser = pd.Series(data_missing)
result = ser.fillna(fill_value)
expected = pd.Series(
data_missing._from_sequence(
[fill_value, fill_value], dtype=data_missing.dtype
)
)
tm.assert_series_equal(result, expected)
# Fill with a series
result = ser.fillna(expected)
tm.assert_series_equal(result, expected)
# Fill with a series not affecting the missing values
result = ser.fillna(ser)
tm.assert_series_equal(result, ser)

Copy link
Contributor Author

@natmokval natmokval Mar 14, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in pandas/tests/frame/methods/test_fillna.py I found test_fillna_invalid_value.

def test_fillna_invalid_value(self, float_frame):
# list
msg = '"value" parameter must be a scalar or dict, but you passed a "{}"'
with pytest.raises(TypeError, match=msg.format("list")):
float_frame.fillna([1, 2])
# tuple
with pytest.raises(TypeError, match=msg.format("tuple")):
float_frame.fillna((1, 2))
# frame with series
msg = (
'"value" parameter must be a scalar, dict or Series, but you '
'passed a "DataFrame"'
)
with pytest.raises(TypeError, match=msg):
float_frame.iloc[:, 0].fillna(float_frame)

In this test we check if for value: list, tuple or DataFrame fillna raises TypeError.
We don't validate values: np.array and Series in this tests though. I checked, for examples below

df = DataFrame({"a": [1, None, 3]})
df.iloc[:, 0].fillna(value=np.array([1, 0]))

we get the TypeError: TypeError: "value" parameter must be a scalar, dict or Series, but you passed a "ndarray"

Should I add a check for np.array and maybe correct the condition in if statement?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for looking! Since we have a test with np.array and check that we're raising the error correctly so no need to add the check

raise TypeError(
'"value" parameter must be a scalar or dict, but '
f'you passed a "{type(value).__name__}"'
)

# set the default here, so functions examining the signaure
Expand All @@ -6949,15 +6928,7 @@ def fillna(
axis = self._get_axis_number(axis)

if value is None:
return self._pad_or_backfill(
# error: Argument 1 to "_pad_or_backfill" of "NDFrame" has
# incompatible type "Optional[Literal['backfill', 'bfill', 'ffill',
# 'pad']]"; expected "Literal['ffill', 'bfill', 'pad', 'backfill']"
method, # type: ignore[arg-type]
axis=axis,
limit=limit,
inplace=inplace,
)
raise ValueError("Must specify a fill 'value'.")
else:
if self.ndim == 1:
if isinstance(value, (dict, ABCSeries)):
Expand Down
8 changes: 1 addition & 7 deletions pandas/tests/extension/base/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,6 @@ def test_fillna_scalar(self, data_missing):
expected = data_missing.fillna(valid)
tm.assert_extension_array_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
)
def test_fillna_limit_pad(self, data_missing):
arr = data_missing.take([1, 0, 0, 0, 1])
result = pd.Series(arr).ffill(limit=2)
Expand Down Expand Up @@ -99,12 +96,9 @@ def test_ffill_limit_area(
expected = pd.Series(data_missing.take(expected_ilocs))
tm.assert_series_equal(result, expected)

@pytest.mark.filterwarnings(
"ignore:Series.fillna with 'method' is deprecated:FutureWarning"
)
def test_fillna_limit_backfill(self, data_missing):
arr = data_missing.take([1, 0, 0, 0, 1])
result = pd.Series(arr).fillna(method="backfill", limit=2)
result = pd.Series(arr).bfill(limit=2)
expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
tm.assert_series_equal(result, expected)

Expand Down
9 changes: 0 additions & 9 deletions pandas/tests/extension/decimal/test_decimal.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,15 +187,6 @@ def test_ffill_limit_area(
)

def test_fillna_limit_backfill(self, data_missing):
msg = "Series.fillna with 'method' is deprecated"
with tm.assert_produces_warning(
FutureWarning,
match=msg,
check_stacklevel=False,
raise_on_extra_warnings=False,
):
super().test_fillna_limit_backfill(data_missing)

msg = "ExtensionArray.fillna 'method' keyword is deprecated"
with tm.assert_produces_warning(
DeprecationWarning,
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/extension/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -234,11 +234,6 @@ def test_isna(self, data_missing):
expected = SparseArray([False, False], fill_value=False, dtype=expected_dtype)
tm.assert_equal(sarr.isna(), expected)

def test_fillna_limit_backfill(self, data_missing):
warns = FutureWarning
with tm.assert_produces_warning(warns, check_stacklevel=False):
super().test_fillna_limit_backfill(data_missing)

def test_fillna_no_op_returns_copy(self, data, request):
if np.isnan(data.fill_value):
request.applymarker(
Expand Down
Loading
Loading