Skip to content

DEPR: support axis=None in DataFrame reductions #52042

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 22 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
2f8666f
DEPR: support axis=None in DataFrame reductions
jbrockmendel Mar 17, 2023
5bfe28a
Merge branch 'main' into depr-reductions
jbrockmendel Mar 17, 2023
395b91d
Merge branch 'main' of https://github.com/pandas-dev/pandas into depr…
jbrockmendel Mar 17, 2023
aa592d6
Merge branch 'main' into depr-reductions
jbrockmendel Mar 20, 2023
71f70eb
Merge branch 'main' into depr-reductions
jbrockmendel Mar 22, 2023
5b8e078
test, whatsnew
jbrockmendel Mar 22, 2023
b07fc3b
Merge branch 'main' into depr-reductions
jbrockmendel Mar 23, 2023
b1a1b1c
Merge branch 'main' into depr-reductions
jbrockmendel Mar 24, 2023
6a99ba5
catch in apply(sum)
jbrockmendel Apr 29, 2023
8d8703a
Merge branch 'main' into depr-reductions
jbrockmendel Apr 29, 2023
09a46ac
Merge branch 'main' into depr-reductions
jbrockmendel May 5, 2023
cd6879e
Merge branch 'main' into depr-reductions
jbrockmendel May 15, 2023
c35b206
Merge branch 'main' into depr-reductions
jbrockmendel May 16, 2023
b1d4ab7
Fix defaults
jbrockmendel May 17, 2023
54f90e4
Merge branch 'main' into depr-reductions
jbrockmendel May 17, 2023
adafb33
catch warnings
jbrockmendel May 17, 2023
13bb07d
Merge branch 'main' into depr-reductions
jbrockmendel May 22, 2023
3b2f589
dont check stacklevel
jbrockmendel May 22, 2023
fe2b690
Merge branch 'main' into depr-reductions
jbrockmendel May 24, 2023
b335e65
mypy fixup
jbrockmendel May 24, 2023
05e991b
Merge branch 'main' into depr-reductions
jbrockmendel May 25, 2023
ecef601
catch warning
jbrockmendel May 25, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ Deprecations
- Deprecated the ``axis`` keyword in :meth:`DataFrame.ewm`, :meth:`Series.ewm`, :meth:`DataFrame.rolling`, :meth:`Series.rolling`, :meth:`DataFrame.expanding`, :meth:`Series.expanding` (:issue:`51778`)
- Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`)
- Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`)
- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`)
-

.. ---------------------------------------------------------------------------
Expand Down
49 changes: 37 additions & 12 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -11079,6 +11079,8 @@ def _logical_func(
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
)
return res._logical_func(name, func, skipna=skipna, **kwargs)
elif axis is None:
axis = 0

if (
self.ndim > 1
Expand Down Expand Up @@ -11183,15 +11185,27 @@ def _stat_function_ddof(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
**kwargs,
) -> Series | float:
nv.validate_stat_ddof_func((), kwargs, fname=name)
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11200,7 +11214,7 @@ def _stat_function_ddof(

def sem(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11212,7 +11226,7 @@ def sem(

def var(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11224,7 +11238,7 @@ def var(

def std(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand Down Expand Up @@ -11338,7 +11352,7 @@ def _min_count_stat_function(
self,
name: str,
func,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11354,6 +11368,17 @@ def _min_count_stat_function(
validate_bool_kwarg(skipna, "skipna", none_allowed=False)

if axis is None:
if self.ndim > 1:
warnings.warn(
f"The behavior of {type(self).__name__}.{name} with axis=None "
"is deprecated, in a future version this will reduce over both "
"axes and return a scalar. To retain the old behavior, pass "
"axis=0 (or do not pass axis)",
FutureWarning,
stacklevel=find_stack_level(),
)
axis = 0
elif axis is lib.no_default:
axis = 0

return self._reduce(
Expand All @@ -11367,7 +11392,7 @@ def _min_count_stat_function(

def sum(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11379,7 +11404,7 @@ def sum(

def prod(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand Down Expand Up @@ -11500,7 +11525,7 @@ def all(
)
def sem(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11522,7 +11547,7 @@ def sem(
)
def var(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand All @@ -11545,7 +11570,7 @@ def var(
)
def std(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
ddof: int = 1,
numeric_only: bool_t = False,
Expand Down Expand Up @@ -11633,7 +11658,7 @@ def cumprod(
)
def sum(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand All @@ -11655,7 +11680,7 @@ def sum(
)
def prod(
self,
axis: Axis | None = None,
axis: Axis | None | lib.NoDefault = lib.no_default,
skipna: bool_t = True,
numeric_only: bool_t = False,
min_count: int = 0,
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/frame/test_npfuncs.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,20 @@ def test_np_sqrt(self, float_frame):
assert result.columns is float_frame.columns

tm.assert_frame_equal(result, float_frame.apply(np.sqrt))

def test_sum_deprecated_axis_behavior(self):
# GH#52042 deprecated behavior of df.sum(axis=None), which gets
# called when we do np.sum(df)

arr = np.random.randn(4, 3)
df = DataFrame(arr)

msg = "The behavior of DataFrame.sum with axis=None is deprecated"
with tm.assert_produces_warning(
FutureWarning, match=msg, check_stacklevel=False
):
res = np.sum(df)

with tm.assert_produces_warning(FutureWarning, match=msg):
expected = df.sum(axis=None)
tm.assert_series_equal(res, expected)