Skip to content

CLN: Enforce deprecation of axis=None in DataFrame reductions #57684

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions asv_bench/benchmarks/stat_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def setup(self, op, axis):
("median", 1),
("median", None),
("std", 1),
("std", None),
)
):
# Skipping cases where datetime aggregations are not implemented
Expand Down
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,7 @@ Removal of prior version deprecations/changes
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`)
Expand Down Expand Up @@ -238,7 +239,6 @@ Removal of prior version deprecations/changes
- Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`)
- Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`)


.. ---------------------------------------------------------------------------
.. _whatsnew_300.performance:

Expand Down
8 changes: 6 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -11514,7 +11514,9 @@ def sum(
min_count=min_count,
**kwargs,
)
return result.__finalize__(self, method="sum")
if isinstance(result, Series):
result = result.__finalize__(self, method="sum")
return result

@doc(make_doc("prod", ndim=2))
def prod(
Expand All @@ -11532,7 +11534,9 @@ def prod(
min_count=min_count,
**kwargs,
)
return result.__finalize__(self, method="prod")
if isinstance(result, Series):
result = result.__finalize__(self, method="prod")
return result

# error: Signature of "mean" incompatible with supertype "NDFrame"
@overload # type: ignore[override]
Expand Down
32 changes: 2 additions & 30 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11445,7 +11445,7 @@ def _stat_function_ddof(
self,
name: str,
func,
axis: Axis | None | lib.NoDefault = lib.no_default,
axis: Axis | None = 0,
skipna: bool = True,
ddof: int = 1,
numeric_only: bool = False,
Expand All @@ -11454,20 +11454,6 @@ def _stat_function_ddof(
nv.validate_stat_ddof_func((), kwargs, fname=name)
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof
)
Expand Down Expand Up @@ -11619,7 +11605,7 @@ def _min_count_stat_function(
self,
name: str,
func,
axis: Axis | None | lib.NoDefault = lib.no_default,
axis: Axis | None = 0,
skipna: bool = True,
numeric_only: bool = False,
min_count: int = 0,
Expand All @@ -11630,20 +11616,6 @@ def _min_count_stat_function(

validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
func,
name=name,
Expand Down
16 changes: 5 additions & 11 deletions pandas/tests/frame/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,22 +27,16 @@ def test_np_sqrt(self, float_frame):

tm.assert_frame_equal(result, float_frame.apply(np.sqrt))

def test_sum_deprecated_axis_behavior(self):
# GH#52042 deprecated behavior of df.sum(axis=None), which gets
def test_sum_axis_behavior(self):
# GH#52042 df.sum(axis=None) now reduces over both axes, which gets
# called when we do np.sum(df)

arr = np.random.default_rng(2).standard_normal((4, 3))
df = DataFrame(arr)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
res = np.sum(df)

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.sum(axis=None)
tm.assert_series_equal(res, expected)
res = np.sum(df)
expected = df.to_numpy().sum(axis=None)
assert res == expected

def test_np_ravel(self):
# GH26247
Expand Down
4 changes: 1 addition & 3 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,9 +132,7 @@ def test_agg_apply_corner(ts, tsframe):
tm.assert_frame_equal(grouped.sum(), exp_df)
tm.assert_frame_equal(grouped.agg("sum"), exp_df)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
res = grouped.apply(np.sum)
res = grouped.apply(np.sum, axis=0)
tm.assert_frame_equal(res, exp_df)


Expand Down
6 changes: 1 addition & 5 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -222,11 +222,7 @@ def test_groupby_raises_string_np(
"Could not convert string .* to numeric",
),
}[groupby_func_np]
if how == "transform" and groupby_func_np is np.sum and not groupby_series:
warn_msg = "The behavior of DataFrame.sum with axis=None is deprecated"
else:
warn_msg = ""
_call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg)
_call_and_check(klass, msg, how, gb, groupby_func_np, ())


@pytest.mark.parametrize("how", ["method", "agg", "transform"])
Expand Down
9 changes: 2 additions & 7 deletions pandas/tests/window/test_expanding.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,7 @@ def test_expanding_corr_pairwise(frame):
@pytest.mark.parametrize(
"func,static_comp",
[
("sum", np.sum),
("sum", lambda x: np.sum(x, axis=0)),
("mean", lambda x: np.mean(x, axis=0)),
("max", lambda x: np.max(x, axis=0)),
("min", lambda x: np.min(x, axis=0)),
Expand All @@ -324,12 +324,7 @@ def test_expanding_func(func, static_comp, frame_or_series):
result = getattr(obj, func)()
assert isinstance(result, frame_or_series)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
warn = None
if frame_or_series is DataFrame and static_comp is np.sum:
warn = FutureWarning
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
expected = static_comp(data[:11])
expected = static_comp(data[:11])
if frame_or_series is Series:
tm.assert_almost_equal(result[10], expected)
else:
Expand Down