Skip to content

Commit 364c98a

Browse files
author
luke
committed
Revert "DEPR: Enforce certain DataFrame reductions w/ axis=None to return scalars (pandas-dev#50593)"
This reverts commit 47c9ee7.
1 parent bba7405 commit 364c98a

File tree

7 files changed

+80
-54
lines changed

7 files changed

+80
-54
lines changed

doc/source/whatsnew/v2.0.0.rst

-1
Original file line numberDiff line numberDiff line change
@@ -761,7 +761,6 @@ Removal of prior version deprecations/changes
761761
- Changed behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and an incompatible ``fill_value``; this now casts to ``object`` dtype instead of raising, consistent with the behavior with other dtypes (:issue:`45746`)
762762
- Change the default argument of ``regex`` for :meth:`Series.str.replace` from ``True`` to ``False``. Additionally, a single character ``pat`` with ``regex=True`` is now treated as a regular expression instead of a string literal. (:issue:`36695`, :issue:`24804`)
763763
- Changed behavior of :meth:`DataFrame.any` and :meth:`DataFrame.all` with ``bool_only=True``; object-dtype columns with all-bool values will no longer be included, manually cast to ``bool`` dtype first (:issue:`46188`)
764-
- Changed behavior of :meth:`DataFrame.max`, :class:`DataFrame.min`, :class:`DataFrame.mean`, :class:`DataFrame.median`, :class:`DataFrame.skew`, :class:`DataFrame.kurt` with ``axis=None`` to return a scalar applying the aggregation across both axes (:issue:`45072`)
765764
- Changed behavior of comparison of a :class:`Timestamp` with a ``datetime.date`` object; these now compare as un-equal and raise on inequality comparisons, matching the ``datetime.datetime`` behavior (:issue:`36131`)
766765
- Changed behavior of comparison of ``NaT`` with a ``datetime.date`` object; these now raise on inequality comparisons (:issue:`39196`)
767766
- Enforced deprecation of silently dropping columns that raised a ``TypeError`` in :class:`Series.transform` and :class:`DataFrame.transform` when used with a list or dictionary (:issue:`43740`)

pandas/core/frame.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -10357,8 +10357,9 @@ def _reduce(
1035710357
assert filter_type is None or filter_type == "bool", filter_type
1035810358
out_dtype = "bool" if filter_type == "bool" else None
1035910359

10360-
if axis is not None:
10361-
axis = self._get_axis_number(axis)
10360+
# TODO: Make other agg func handle axis=None properly GH#21597
10361+
axis = self._get_axis_number(axis)
10362+
assert axis in [0, 1]
1036210363

1036310364
def func(values: np.ndarray):
1036410365
# We only use this in the case that operates on self.values
@@ -10409,7 +10410,7 @@ def _get_data() -> DataFrame:
1040910410

1041010411
return out
1041110412

10412-
assert not numeric_only and axis in (1, None)
10413+
assert not numeric_only and axis == 1
1041310414

1041410415
data = self
1041510416
values = data.values
@@ -10425,9 +10426,6 @@ def _get_data() -> DataFrame:
1042510426
# try to coerce to the original dtypes item by item if we can
1042610427
pass
1042710428

10428-
if axis is None:
10429-
return result
10430-
1043110429
labels = self._get_agg_axis(axis)
1043210430
result = self._constructor_sliced(result, index=labels)
1043310431
return result

pandas/core/generic.py

+31-20
Original file line numberDiff line numberDiff line change
@@ -10949,7 +10949,7 @@ def _stat_function(
1094910949
self,
1095010950
name: str,
1095110951
func,
10952-
axis: Axis | None = 0,
10952+
axis: Axis | None | lib.NoDefault = None,
1095310953
skipna: bool_t = True,
1095410954
numeric_only: bool_t = False,
1095510955
**kwargs,
@@ -10961,13 +10961,30 @@ def _stat_function(
1096110961

1096210962
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
1096310963

10964+
if axis is None and self.ndim > 1:
10965+
# user must have explicitly passed axis=None
10966+
# GH#21597
10967+
warnings.warn(
10968+
f"In a future version, DataFrame.{name}(axis=None) will return a "
10969+
f"scalar {name} over the entire DataFrame. To retain the old "
10970+
f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'",
10971+
FutureWarning,
10972+
stacklevel=find_stack_level(),
10973+
)
10974+
10975+
if axis is lib.no_default:
10976+
axis = None
10977+
10978+
if axis is None:
10979+
axis = self._stat_axis_number
10980+
1096410981
return self._reduce(
1096510982
func, name=name, axis=axis, skipna=skipna, numeric_only=numeric_only
1096610983
)
1096710984

1096810985
def min(
1096910986
self,
10970-
axis: Axis | None = 0,
10987+
axis: Axis | None | lib.NoDefault = lib.no_default,
1097110988
skipna: bool_t = True,
1097210989
numeric_only: bool_t = False,
1097310990
**kwargs,
@@ -10983,7 +11000,7 @@ def min(
1098311000

1098411001
def max(
1098511002
self,
10986-
axis: Axis | None = 0,
11003+
axis: Axis | None | lib.NoDefault = lib.no_default,
1098711004
skipna: bool_t = True,
1098811005
numeric_only: bool_t = False,
1098911006
**kwargs,
@@ -10999,7 +11016,7 @@ def max(
1099911016

1100011017
def mean(
1100111018
self,
11002-
axis: Axis | None = 0,
11019+
axis: Axis | None | lib.NoDefault = lib.no_default,
1100311020
skipna: bool_t = True,
1100411021
numeric_only: bool_t = False,
1100511022
**kwargs,
@@ -11010,7 +11027,7 @@ def mean(
1101011027

1101111028
def median(
1101211029
self,
11013-
axis: Axis | None = 0,
11030+
axis: Axis | None | lib.NoDefault = lib.no_default,
1101411031
skipna: bool_t = True,
1101511032
numeric_only: bool_t = False,
1101611033
**kwargs,
@@ -11021,7 +11038,7 @@ def median(
1102111038

1102211039
def skew(
1102311040
self,
11024-
axis: Axis | None = 0,
11041+
axis: Axis | None | lib.NoDefault = lib.no_default,
1102511042
skipna: bool_t = True,
1102611043
numeric_only: bool_t = False,
1102711044
**kwargs,
@@ -11032,7 +11049,7 @@ def skew(
1103211049

1103311050
def kurt(
1103411051
self,
11035-
axis: Axis | None = 0,
11052+
axis: Axis | None | lib.NoDefault = lib.no_default,
1103611053
skipna: bool_t = True,
1103711054
numeric_only: bool_t = False,
1103811055
**kwargs,
@@ -11354,7 +11371,7 @@ def prod(
1135411371
)
1135511372
def mean(
1135611373
self,
11357-
axis: AxisInt | None = 0,
11374+
axis: AxisInt | None | lib.NoDefault = lib.no_default,
1135811375
skipna: bool_t = True,
1135911376
numeric_only: bool_t = False,
1136011377
**kwargs,
@@ -11375,7 +11392,7 @@ def mean(
1137511392
)
1137611393
def skew(
1137711394
self,
11378-
axis: AxisInt | None = 0,
11395+
axis: AxisInt | None | lib.NoDefault = lib.no_default,
1137911396
skipna: bool_t = True,
1138011397
numeric_only: bool_t = False,
1138111398
**kwargs,
@@ -11399,7 +11416,7 @@ def skew(
1139911416
)
1140011417
def kurt(
1140111418
self,
11402-
axis: Axis | None = 0,
11419+
axis: Axis | None | lib.NoDefault = lib.no_default,
1140311420
skipna: bool_t = True,
1140411421
numeric_only: bool_t = False,
1140511422
**kwargs,
@@ -11421,7 +11438,7 @@ def kurt(
1142111438
)
1142211439
def median(
1142311440
self,
11424-
axis: AxisInt | None = 0,
11441+
axis: AxisInt | None | lib.NoDefault = lib.no_default,
1142511442
skipna: bool_t = True,
1142611443
numeric_only: bool_t = False,
1142711444
**kwargs,
@@ -11444,7 +11461,7 @@ def median(
1144411461
)
1144511462
def max(
1144611463
self,
11447-
axis: AxisInt | None = 0,
11464+
axis: AxisInt | None | lib.NoDefault = lib.no_default,
1144811465
skipna: bool_t = True,
1144911466
numeric_only: bool_t = False,
1145011467
**kwargs,
@@ -11467,7 +11484,7 @@ def max(
1146711484
)
1146811485
def min(
1146911486
self,
11470-
axis: AxisInt | None = 0,
11487+
axis: AxisInt | None | lib.NoDefault = lib.no_default,
1147111488
skipna: bool_t = True,
1147211489
numeric_only: bool_t = False,
1147311490
**kwargs,
@@ -11696,12 +11713,6 @@ def _doc_params(cls):
1169611713
axis : {axis_descr}
1169711714
Axis for the function to be applied on.
1169811715
For `Series` this parameter is unused and defaults to 0.
11699-
11700-
For DataFrames, specifying ``axis=None`` will apply the aggregation
11701-
across both axes.
11702-
11703-
.. versionadded:: 2.0.0
11704-
1170511716
skipna : bool, default True
1170611717
Exclude NA/null values when computing the result.
1170711718
numeric_only : bool, default False
@@ -11713,7 +11724,7 @@ def _doc_params(cls):
1171311724
1171411725
Returns
1171511726
-------
11716-
{name1} or scalar\
11727+
{name1} or {name2} (if level specified)\
1171711728
{see_also}\
1171811729
{examples}
1171911730
"""

pandas/tests/frame/test_reductions.py

+15-16
Original file line numberDiff line numberDiff line change
@@ -1488,6 +1488,7 @@ def test_median_categorical_dtype_nuisance_column(self):
14881488
# TODO: np.median(df, axis=0) gives np.array([2.0, 2.0]) instead
14891489
# of expected.values
14901490

1491+
@pytest.mark.filterwarnings("ignore:.*will return a scalar.*:FutureWarning")
14911492
@pytest.mark.parametrize("method", ["min", "max"])
14921493
def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
14931494
# GH#28949 DataFrame.min should behave like Series.min
@@ -1509,15 +1510,15 @@ def test_min_max_categorical_dtype_non_ordered_nuisance_column(self, method):
15091510
getattr(df, method)()
15101511

15111512
with pytest.raises(TypeError, match="is not ordered for operation"):
1512-
getattr(np, method)(df, axis=0)
1513+
getattr(np, method)(df)
15131514

15141515
# same thing, but with an additional non-categorical column
15151516
df["B"] = df["A"].astype(object)
15161517
with pytest.raises(TypeError, match="is not ordered for operation"):
15171518
getattr(df, method)()
15181519

15191520
with pytest.raises(TypeError, match="is not ordered for operation"):
1520-
getattr(np, method)(df, axis=0)
1521+
getattr(np, method)(df)
15211522

15221523

15231524
def test_sum_timedelta64_skipna_false(using_array_manager, request):
@@ -1599,22 +1600,20 @@ def test_prod_sum_min_count_mixed_object():
15991600

16001601

16011602
@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
1602-
def test_reduction_axis_none_returns_scalar(method):
1603-
# GH#21597 As of 2.0, axis=None reduces over all axes.
1603+
def test_reduction_axis_none_deprecation(method):
1604+
# GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it
1605+
# to reducing over all axes.
16041606

16051607
df = DataFrame(np.random.randn(4, 4))
1606-
1607-
result = getattr(df, method)(axis=None)
1608-
np_arr = df.to_numpy()
1609-
if method in {"skew", "kurt"}:
1610-
comp_mod = pytest.importorskip("scipy.stats")
1611-
if method == "kurt":
1612-
method = "kurtosis"
1613-
expected = getattr(comp_mod, method)(np_arr, bias=False, axis=None)
1614-
tm.assert_almost_equal(result, expected)
1615-
else:
1616-
expected = getattr(np, method)(np_arr, axis=None)
1617-
assert result == expected
1608+
meth = getattr(df, method)
1609+
1610+
msg = f"scalar {method} over the entire DataFrame"
1611+
with tm.assert_produces_warning(FutureWarning, match=msg):
1612+
res = meth(axis=None)
1613+
with tm.assert_produces_warning(None):
1614+
expected = meth()
1615+
tm.assert_series_equal(res, expected)
1616+
tm.assert_series_equal(res, meth(axis=0))
16181617

16191618

16201619
@pytest.mark.parametrize(

pandas/tests/groupby/test_categorical.py

+6-2
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,11 @@ def f(x):
147147
tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
148148

149149
gbc = df.groupby(c, observed=False)
150-
result = gbc.transform(lambda xs: np.max(xs, axis=0))
150+
with tm.assert_produces_warning(
151+
FutureWarning, match="scalar max", check_stacklevel=False
152+
):
153+
# stacklevel is thrown off (i think) bc the stack goes through numpy C code
154+
result = gbc.transform(lambda xs: np.max(xs))
151155
tm.assert_frame_equal(result, df[["a"]])
152156

153157
with tm.assert_produces_warning(None):
@@ -291,7 +295,7 @@ def test_apply(ordered):
291295
idx = MultiIndex.from_arrays([missing, dense], names=["missing", "dense"])
292296
expected = DataFrame([0, 1, 2.0], index=idx, columns=["values"])
293297

294-
result = grouped.apply(lambda x: np.mean(x, axis=0))
298+
result = grouped.apply(lambda x: np.mean(x))
295299
tm.assert_frame_equal(result, expected)
296300

297301
result = grouped.mean()

pandas/tests/groupby/test_function.py

+12-4
Original file line numberDiff line numberDiff line change
@@ -80,20 +80,28 @@ def test_builtins_apply(keys, f):
8080
assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
8181
assert result.shape == (ngroups, 3), assert_msg
8282

83-
npfunc = lambda x: getattr(np, fname)(x, axis=0) # numpy's equivalent function
84-
expected = gb.apply(npfunc)
83+
npfunc = getattr(np, fname) # numpy's equivalent function
84+
if f in [max, min]:
85+
warn = FutureWarning
86+
else:
87+
warn = None
88+
msg = "scalar (max|min) over the entire DataFrame"
89+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
90+
# stacklevel can be thrown off because (i think) the stack
91+
# goes through some of numpy's C code.
92+
expected = gb.apply(npfunc)
8593
tm.assert_frame_equal(result, expected)
8694

8795
with tm.assert_produces_warning(None):
88-
expected2 = gb.apply(lambda x: npfunc(x))
96+
expected2 = gb.apply(lambda x: npfunc(x, axis=0))
8997
tm.assert_frame_equal(result, expected2)
9098

9199
if f != sum:
92100
expected = gb.agg(fname).reset_index()
93101
expected.set_index(keys, inplace=True, drop=False)
94102
tm.assert_frame_equal(result, expected, check_dtype=False)
95103

96-
tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0))
104+
tm.assert_series_equal(getattr(result, fname)(), getattr(df, fname)())
97105

98106

99107
class TestNumericOnly:

pandas/tests/groupby/transform/test_transform.py

+12-5
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ def test_transform():
5757
tm.assert_frame_equal(result, expected)
5858

5959
def demean(arr):
60-
return arr - arr.mean(axis=0)
60+
return arr - arr.mean()
6161

6262
people = DataFrame(
6363
np.random.randn(5, 5),
@@ -144,7 +144,7 @@ def test_transform_broadcast(tsframe, ts):
144144
result = grouped.transform(np.mean)
145145
tm.assert_index_equal(result.index, tsframe.index)
146146
for _, gp in grouped:
147-
agged = gp.mean(axis=0)
147+
agged = gp.mean()
148148
res = result.reindex(gp.index)
149149
for col in tsframe:
150150
assert_fp_equal(res[col], agged[col])
@@ -214,7 +214,7 @@ def test_transform_axis_ts(tsframe):
214214
ts = tso
215215
grouped = ts.groupby(lambda x: x.weekday(), group_keys=False)
216216
result = ts - grouped.transform("mean")
217-
expected = grouped.apply(lambda x: x - x.mean(axis=0))
217+
expected = grouped.apply(lambda x: x - x.mean())
218218
tm.assert_frame_equal(result, expected)
219219

220220
ts = ts.T
@@ -227,7 +227,7 @@ def test_transform_axis_ts(tsframe):
227227
ts = tso.iloc[[1, 0] + list(range(2, len(base)))]
228228
grouped = ts.groupby(lambda x: x.weekday(), group_keys=False)
229229
result = ts - grouped.transform("mean")
230-
expected = grouped.apply(lambda x: x - x.mean(axis=0))
230+
expected = grouped.apply(lambda x: x - x.mean())
231231
tm.assert_frame_equal(result, expected)
232232

233233
ts = ts.T
@@ -477,9 +477,16 @@ def test_transform_coercion():
477477

478478
expected = g.transform(np.mean)
479479

480-
result = g.transform(lambda x: np.mean(x, axis=0))
480+
# in 2.0 np.mean on a DataFrame is equivalent to frame.mean(axis=None)
481+
# which not gives a scalar instead of Series
482+
with tm.assert_produces_warning(FutureWarning, check_stacklevel=False):
483+
result = g.transform(lambda x: np.mean(x))
481484
tm.assert_frame_equal(result, expected)
482485

486+
with tm.assert_produces_warning(None):
487+
result2 = g.transform(lambda x: np.mean(x, axis=0))
488+
tm.assert_frame_equal(result2, expected)
489+
483490

484491
def test_groupby_transform_with_int():
485492

0 commit comments

Comments
 (0)