Skip to content

Commit 448e5c2

Browse files
jbrockmendeltopper-123
authored andcommitted
DEPR: support axis=None in DataFrame reductions (pandas-dev#52042)
* DEPR: support axis=None in DataFrame reductions * test, whatsnew * catch in apply(sum) * Fix defaults * catch warnings * dont check stacklevel * mypy fixup * catch warning
1 parent f97b19a commit 448e5c2

File tree

10 files changed

+84
-18
lines changed

10 files changed

+84
-18
lines changed

doc/source/whatsnew/v0.15.1.rst

+1
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ API changes
102102
current behavior:
103103

104104
.. ipython:: python
105+
:okwarning:
105106
106107
gr.apply(sum)
107108

doc/source/whatsnew/v2.1.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,7 @@ Deprecations
284284
- Deprecated :meth:`DataFrame.applymap`. Use the new :meth:`DataFrame.map` method instead (:issue:`52353`)
285285
- Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`)
286286
- Deprecated ``freq`` parameter in :class:`PeriodArray` constructor, pass ``dtype`` instead (:issue:`52462`)
287+
- Deprecated behavior of :class:`DataFrame` reductions ``sum``, ``prod``, ``std``, ``var``, ``sem`` with ``axis=None``, in a future version this will operate over both axes returning a scalar instead of behaving like ``axis=0``; note this also affects numpy functions e.g. ``np.sum(df)`` (:issue:`21597`)
287288
- Deprecated behavior of :func:`concat` when :class:`DataFrame` has columns that are all-NA, in a future version these will not be discarded when determining the resulting dtype (:issue:`40893`)
288289
- Deprecated behavior of :meth:`Series.dt.to_pydatetime`, in a future version this will return a :class:`Series` containing python ``datetime`` objects instead of an ``ndarray`` of datetimes; this matches the behavior of other :meth:`Series.dt` properties (:issue:`20306`)
289290
- Deprecated logical operations (``|``, ``&``, ``^``) between pandas objects and dtype-less sequences (e.g. ``list``, ``tuple``), wrap a sequence in a :class:`Series` or numpy array before operating instead (:issue:`51521`)

pandas/core/frame.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -10981,7 +10981,7 @@ def max(
1098110981
@doc(make_doc("sum", ndim=2))
1098210982
def sum(
1098310983
self,
10984-
axis: Axis | None = None,
10984+
axis: Axis | None = 0,
1098510985
skipna: bool = True,
1098610986
numeric_only: bool = False,
1098710987
min_count: int = 0,
@@ -10993,7 +10993,7 @@ def sum(
1099310993
@doc(make_doc("prod", ndim=2))
1099410994
def prod(
1099510995
self,
10996-
axis: Axis | None = None,
10996+
axis: Axis | None = 0,
1099710997
skipna: bool = True,
1099810998
numeric_only: bool = False,
1099910999
min_count: int = 0,
@@ -11024,7 +11024,7 @@ def median(
1102411024
@doc(make_doc("sem", ndim=2))
1102511025
def sem(
1102611026
self,
11027-
axis: Axis | None = None,
11027+
axis: Axis | None = 0,
1102811028
skipna: bool = True,
1102911029
ddof: int = 1,
1103011030
numeric_only: bool = False,
@@ -11035,7 +11035,7 @@ def sem(
1103511035
@doc(make_doc("var", ndim=2))
1103611036
def var(
1103711037
self,
11038-
axis: Axis | None = None,
11038+
axis: Axis | None = 0,
1103911039
skipna: bool = True,
1104011040
ddof: int = 1,
1104111041
numeric_only: bool = False,
@@ -11046,7 +11046,7 @@ def var(
1104611046
@doc(make_doc("std", ndim=2))
1104711047
def std(
1104811048
self,
11049-
axis: Axis | None = None,
11049+
axis: Axis | None = 0,
1105011050
skipna: bool = True,
1105111051
ddof: int = 1,
1105211052
numeric_only: bool = False,

pandas/core/generic.py

+32-7
Original file line numberDiff line numberDiff line change
@@ -11290,6 +11290,8 @@ def _logical_func(
1129011290
name, func, axis=0, bool_only=bool_only, skipna=skipna, **kwargs
1129111291
)
1129211292
return res._logical_func(name, func, skipna=skipna, **kwargs)
11293+
elif axis is None:
11294+
axis = 0
1129311295

1129411296
if (
1129511297
self.ndim > 1
@@ -11394,15 +11396,27 @@ def _stat_function_ddof(
1139411396
self,
1139511397
name: str,
1139611398
func,
11397-
axis: Axis | None = None,
11399+
axis: Axis | None | lib.NoDefault = lib.no_default,
1139811400
skipna: bool_t = True,
1139911401
ddof: int = 1,
1140011402
numeric_only: bool_t = False,
1140111403
**kwargs,
1140211404
) -> Series | float:
1140311405
nv.validate_stat_ddof_func((), kwargs, fname=name)
1140411406
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
11407+
1140511408
if axis is None:
11409+
if self.ndim > 1:
11410+
warnings.warn(
11411+
f"The behavior of {type(self).__name__}.{name} with axis=None "
11412+
"is deprecated, in a future version this will reduce over both "
11413+
"axes and return a scalar. To retain the old behavior, pass "
11414+
"axis=0 (or do not pass axis)",
11415+
FutureWarning,
11416+
stacklevel=find_stack_level(),
11417+
)
11418+
axis = 0
11419+
elif axis is lib.no_default:
1140611420
axis = 0
1140711421

1140811422
return self._reduce(
@@ -11411,7 +11425,7 @@ def _stat_function_ddof(
1141111425

1141211426
def sem(
1141311427
self,
11414-
axis: Axis | None = None,
11428+
axis: Axis | None = 0,
1141511429
skipna: bool_t = True,
1141611430
ddof: int = 1,
1141711431
numeric_only: bool_t = False,
@@ -11423,7 +11437,7 @@ def sem(
1142311437

1142411438
def var(
1142511439
self,
11426-
axis: Axis | None = None,
11440+
axis: Axis | None = 0,
1142711441
skipna: bool_t = True,
1142811442
ddof: int = 1,
1142911443
numeric_only: bool_t = False,
@@ -11435,7 +11449,7 @@ def var(
1143511449

1143611450
def std(
1143711451
self,
11438-
axis: Axis | None = None,
11452+
axis: Axis | None = 0,
1143911453
skipna: bool_t = True,
1144011454
ddof: int = 1,
1144111455
numeric_only: bool_t = False,
@@ -11547,7 +11561,7 @@ def _min_count_stat_function(
1154711561
self,
1154811562
name: str,
1154911563
func,
11550-
axis: Axis | None = None,
11564+
axis: Axis | None | lib.NoDefault = lib.no_default,
1155111565
skipna: bool_t = True,
1155211566
numeric_only: bool_t = False,
1155311567
min_count: int = 0,
@@ -11559,6 +11573,17 @@ def _min_count_stat_function(
1155911573
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
1156011574

1156111575
if axis is None:
11576+
if self.ndim > 1:
11577+
warnings.warn(
11578+
f"The behavior of {type(self).__name__}.{name} with axis=None "
11579+
"is deprecated, in a future version this will reduce over both "
11580+
"axes and return a scalar. To retain the old behavior, pass "
11581+
"axis=0 (or do not pass axis)",
11582+
FutureWarning,
11583+
stacklevel=find_stack_level(),
11584+
)
11585+
axis = 0
11586+
elif axis is lib.no_default:
1156211587
axis = 0
1156311588

1156411589
return self._reduce(
@@ -11572,7 +11597,7 @@ def _min_count_stat_function(
1157211597

1157311598
def sum(
1157411599
self,
11575-
axis: Axis | None = None,
11600+
axis: Axis | None = 0,
1157611601
skipna: bool_t = True,
1157711602
numeric_only: bool_t = False,
1157811603
min_count: int = 0,
@@ -11584,7 +11609,7 @@ def sum(
1158411609

1158511610
def prod(
1158611611
self,
11587-
axis: Axis | None = None,
11612+
axis: Axis | None = 0,
1158811613
skipna: bool_t = True,
1158911614
numeric_only: bool_t = False,
1159011615
min_count: int = 0,

pandas/tests/frame/test_npfuncs.py

+17
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,23 @@ def test_np_sqrt(self, float_frame):
2727

2828
tm.assert_frame_equal(result, float_frame.apply(np.sqrt))
2929

30+
def test_sum_deprecated_axis_behavior(self):
31+
# GH#52042 deprecated behavior of df.sum(axis=None), which gets
32+
# called when we do np.sum(df)
33+
34+
arr = np.random.randn(4, 3)
35+
df = DataFrame(arr)
36+
37+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
38+
with tm.assert_produces_warning(
39+
FutureWarning, match=msg, check_stacklevel=False
40+
):
41+
res = np.sum(df)
42+
43+
with tm.assert_produces_warning(FutureWarning, match=msg):
44+
expected = df.sum(axis=None)
45+
tm.assert_series_equal(res, expected)
46+
3047
def test_np_ravel(self):
3148
# GH26247
3249
arr = np.array(

pandas/tests/groupby/aggregate/test_aggregate.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,11 @@ def test_agg_apply_corner(ts, tsframe):
153153
)
154154
tm.assert_frame_equal(grouped.sum(), exp_df)
155155
tm.assert_frame_equal(grouped.agg(np.sum), exp_df)
156-
tm.assert_frame_equal(grouped.apply(np.sum), exp_df)
156+
157+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
158+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
159+
res = grouped.apply(np.sum)
160+
tm.assert_frame_equal(res, exp_df)
157161

158162

159163
def test_agg_grouping_is_list_tuple(ts):

pandas/tests/groupby/test_apply.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -1071,14 +1071,17 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func):
10711071

10721072
# Check output when no other methods are called before .apply()
10731073
grp = df.groupby(by="a")
1074-
result = grp.apply(sum)
1074+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
1075+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1076+
result = grp.apply(sum)
10751077
tm.assert_frame_equal(result, expected)
10761078

10771079
# Check output when another method is called before .apply()
10781080
grp = df.groupby(by="a")
10791081
args = get_groupby_method_args(reduction_func, df)
10801082
_ = getattr(grp, reduction_func)(*args)
1081-
result = grp.apply(sum)
1083+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
1084+
result = grp.apply(sum)
10821085
tm.assert_frame_equal(result, expected)
10831086

10841087

pandas/tests/groupby/test_function.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,11 @@ def test_builtins_apply(keys, f):
7373
gb = df.groupby(keys)
7474

7575
fname = f.__name__
76-
result = gb.apply(f)
76+
77+
warn = None if f is not sum else FutureWarning
78+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
79+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
80+
result = gb.apply(f)
7781
ngroups = len(df.drop_duplicates(subset=keys))
7882

7983
assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"

pandas/tests/groupby/test_groupby.py

+7-1
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,13 @@ def test_groupby_as_index_agg(df):
752752

753753
gr = df.groupby(ts)
754754
gr.nth(0) # invokes set_selection_from_grouper internally
755-
tm.assert_frame_equal(gr.apply(sum), df.groupby(ts).apply(sum))
755+
756+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
757+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
758+
res = gr.apply(sum)
759+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
760+
alt = df.groupby(ts).apply(sum)
761+
tm.assert_frame_equal(res, alt)
756762

757763
for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]:
758764
gr = df.groupby(ts, as_index=False)

pandas/tests/window/test_expanding.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -333,7 +333,12 @@ def test_expanding_func(func, static_comp, frame_or_series):
333333
result = getattr(obj, func)()
334334
assert isinstance(result, frame_or_series)
335335

336-
expected = static_comp(data[:11])
336+
msg = "The behavior of DataFrame.sum with axis=None is deprecated"
337+
warn = None
338+
if frame_or_series is DataFrame and static_comp is np.sum:
339+
warn = FutureWarning
340+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
341+
expected = static_comp(data[:11])
337342
if frame_or_series is Series:
338343
tm.assert_almost_equal(result[10], expected)
339344
else:

0 commit comments

Comments
 (0)