Skip to content

Commit 2a2fd48

Browse files
authored
DEPR/ENH: support axis=None in min/max (#45072)
1 parent 358eaec commit 2a2fd48

File tree

8 files changed

+176
-40
lines changed

8 files changed

+176
-40
lines changed

pandas/core/arraylike.py

+6-4
Original file line numberDiff line numberDiff line change
@@ -521,10 +521,12 @@ def dispatch_reduction_ufunc(self, ufunc: np.ufunc, method: str, *inputs, **kwar
521521

522522
if "axis" not in kwargs:
523523
# For DataFrame reductions we don't want the default axis=0
524-
# FIXME: DataFrame.min ignores axis=None
525-
# FIXME: np.minimum.reduce(df) gets here bc axis is not in kwargs,
526-
# but np.minimum.reduce(df.values) behaves as if axis=0
527-
kwargs["axis"] = None
524+
# Note: np.min is not a ufunc, but uses array_function_dispatch,
525+
# so calls DataFrame.min (without ever getting here) with the np.min
526+
# default of axis=None, which DataFrame.min catches and changes to axis=0.
527+
# np.minimum.reduce(df) gets here bc axis is not in kwargs,
528+
# so we set axis=0 to match the behaviorof np.minimum.reduce(df.values)
529+
kwargs["axis"] = 0
528530

529531
# By default, numpy's reductions do not skip NaNs, so we have to
530532
# pass skipna=False

pandas/core/common.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -562,7 +562,14 @@ def require_length_match(data, index: Index):
562562
)
563563

564564

565-
_builtin_table = {builtins.sum: np.sum, builtins.max: np.max, builtins.min: np.min}
565+
# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0,
566+
# whereas np.min and np.max (which directly call obj.min and obj.max)
567+
# default to axis=None.
568+
_builtin_table = {
569+
builtins.sum: np.sum,
570+
builtins.max: np.maximum.reduce,
571+
builtins.min: np.minimum.reduce,
572+
}
566573

567574
_cython_table = {
568575
builtins.sum: "sum",

pandas/core/generic.py

+86-17
Original file line numberDiff line numberDiff line change
@@ -10556,7 +10556,7 @@ def _stat_function(
1055610556
self,
1055710557
name: str,
1055810558
func,
10559-
axis: Axis | None = None,
10559+
axis: Axis | None | lib.NoDefault = None,
1056010560
skipna: bool_t = True,
1056110561
level: Level | None = None,
1056210562
numeric_only: bool_t | None = None,
@@ -10569,8 +10569,22 @@ def _stat_function(
1056910569

1057010570
validate_bool_kwarg(skipna, "skipna", none_allowed=False)
1057110571

10572+
if axis is None and level is None and self.ndim > 1:
10573+
# user must have explicitly passed axis=None
10574+
# GH#21597
10575+
warnings.warn(
10576+
f"In a future version, DataFrame.{name}(axis=None) will return a "
10577+
f"scalar {name} over the entire DataFrame. To retain the old "
10578+
f"behavior, use 'frame.{name}(axis=0)' or just 'frame.{name}()'",
10579+
FutureWarning,
10580+
stacklevel=find_stack_level(),
10581+
)
10582+
if axis is lib.no_default:
10583+
axis = None
10584+
1057210585
if axis is None:
1057310586
axis = self._stat_axis_number
10587+
axis = cast(Axis, axis)
1057410588
if level is not None:
1057510589
warnings.warn(
1057610590
"Using the level keyword in DataFrame and Series aggregations is "
@@ -10588,31 +10602,43 @@ def _stat_function(
1058810602

1058910603
def min(
1059010604
self,
10591-
axis: Axis | None = None,
10605+
axis: Axis | None | lib.NoDefault = lib.no_default,
1059210606
skipna: bool_t = True,
1059310607
level: Level | None = None,
1059410608
numeric_only: bool_t | None = None,
1059510609
**kwargs,
1059610610
):
1059710611
return self._stat_function(
10598-
"min", nanops.nanmin, axis, skipna, level, numeric_only, **kwargs
10612+
"min",
10613+
nanops.nanmin,
10614+
axis,
10615+
skipna,
10616+
level,
10617+
numeric_only,
10618+
**kwargs,
1059910619
)
1060010620

1060110621
def max(
1060210622
self,
10603-
axis: Axis | None = None,
10623+
axis: Axis | None | lib.NoDefault = lib.no_default,
1060410624
skipna: bool_t = True,
1060510625
level: Level | None = None,
1060610626
numeric_only: bool_t | None = None,
1060710627
**kwargs,
1060810628
):
1060910629
return self._stat_function(
10610-
"max", nanops.nanmax, axis, skipna, level, numeric_only, **kwargs
10630+
"max",
10631+
nanops.nanmax,
10632+
axis,
10633+
skipna,
10634+
level,
10635+
numeric_only,
10636+
**kwargs,
1061110637
)
1061210638

1061310639
def mean(
1061410640
self,
10615-
axis: Axis | None = None,
10641+
axis: Axis | None | lib.NoDefault = lib.no_default,
1061610642
skipna: bool_t = True,
1061710643
level: Level | None = None,
1061810644
numeric_only: bool_t | None = None,
@@ -10624,7 +10650,7 @@ def mean(
1062410650

1062510651
def median(
1062610652
self,
10627-
axis: Axis | None = None,
10653+
axis: Axis | None | lib.NoDefault = lib.no_default,
1062810654
skipna: bool_t = True,
1062910655
level: Level | None = None,
1063010656
numeric_only: bool_t | None = None,
@@ -10636,7 +10662,7 @@ def median(
1063610662

1063710663
def skew(
1063810664
self,
10639-
axis: Axis | None = None,
10665+
axis: Axis | None | lib.NoDefault = lib.no_default,
1064010666
skipna: bool_t = True,
1064110667
level: Level | None = None,
1064210668
numeric_only: bool_t | None = None,
@@ -10648,7 +10674,7 @@ def skew(
1064810674

1064910675
def kurt(
1065010676
self,
10651-
axis: Axis | None = None,
10677+
axis: Axis | None | lib.NoDefault = lib.no_default,
1065210678
skipna: bool_t = True,
1065310679
level: Level | None = None,
1065410680
numeric_only: bool_t | None = None,
@@ -10699,6 +10725,7 @@ def _min_count_stat_function(
1069910725
min_count=min_count,
1070010726
numeric_only=numeric_only,
1070110727
)
10728+
1070210729
return self._reduce(
1070310730
func,
1070410731
name=name,
@@ -11039,7 +11066,14 @@ def prod(
1103911066
see_also="",
1104011067
examples="",
1104111068
)
11042-
def mean(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
11069+
def mean(
11070+
self,
11071+
axis: int | None | lib.NoDefault = lib.no_default,
11072+
skipna=True,
11073+
level=None,
11074+
numeric_only=None,
11075+
**kwargs,
11076+
):
1104311077
return NDFrame.mean(self, axis, skipna, level, numeric_only, **kwargs)
1104411078

1104511079
setattr(cls, "mean", mean)
@@ -11054,7 +11088,14 @@ def mean(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
1105411088
see_also="",
1105511089
examples="",
1105611090
)
11057-
def skew(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
11091+
def skew(
11092+
self,
11093+
axis: int | None | lib.NoDefault = lib.no_default,
11094+
skipna=True,
11095+
level=None,
11096+
numeric_only=None,
11097+
**kwargs,
11098+
):
1105811099
return NDFrame.skew(self, axis, skipna, level, numeric_only, **kwargs)
1105911100

1106011101
setattr(cls, "skew", skew)
@@ -11072,7 +11113,14 @@ def skew(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
1107211113
see_also="",
1107311114
examples="",
1107411115
)
11075-
def kurt(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
11116+
def kurt(
11117+
self,
11118+
axis: Axis | None | lib.NoDefault = lib.no_default,
11119+
skipna=True,
11120+
level=None,
11121+
numeric_only=None,
11122+
**kwargs,
11123+
):
1107611124
return NDFrame.kurt(self, axis, skipna, level, numeric_only, **kwargs)
1107711125

1107811126
setattr(cls, "kurt", kurt)
@@ -11089,13 +11137,19 @@ def kurt(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
1108911137
examples="",
1109011138
)
1109111139
def median(
11092-
self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs
11140+
self,
11141+
axis: int | None | lib.NoDefault = lib.no_default,
11142+
skipna=True,
11143+
level=None,
11144+
numeric_only=None,
11145+
**kwargs,
1109311146
):
1109411147
return NDFrame.median(self, axis, skipna, level, numeric_only, **kwargs)
1109511148

1109611149
setattr(cls, "median", median)
1109711150

11098-
@doc(
11151+
# error: Untyped decorator makes function "max" untyped
11152+
@doc( # type: ignore[misc]
1109911153
_num_doc,
1110011154
desc="Return the maximum of the values over the requested axis.\n\n"
1110111155
"If you want the *index* of the maximum, use ``idxmax``. This is "
@@ -11107,12 +11161,20 @@ def median(
1110711161
see_also=_stat_func_see_also,
1110811162
examples=_max_examples,
1110911163
)
11110-
def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
11164+
def max(
11165+
self,
11166+
axis: int | None | lib.NoDefault = lib.no_default,
11167+
skipna=True,
11168+
level=None,
11169+
numeric_only=None,
11170+
**kwargs,
11171+
):
1111111172
return NDFrame.max(self, axis, skipna, level, numeric_only, **kwargs)
1111211173

1111311174
setattr(cls, "max", max)
1111411175

11115-
@doc(
11176+
# error: Untyped decorator makes function "max" untyped
11177+
@doc( # type: ignore[misc]
1111611178
_num_doc,
1111711179
desc="Return the minimum of the values over the requested axis.\n\n"
1111811180
"If you want the *index* of the minimum, use ``idxmin``. This is "
@@ -11124,7 +11186,14 @@ def max(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
1112411186
see_also=_stat_func_see_also,
1112511187
examples=_min_examples,
1112611188
)
11127-
def min(self, axis=None, skipna=True, level=None, numeric_only=None, **kwargs):
11189+
def min(
11190+
self,
11191+
axis: int | None | lib.NoDefault = lib.no_default,
11192+
skipna=True,
11193+
level=None,
11194+
numeric_only=None,
11195+
**kwargs,
11196+
):
1112811197
return NDFrame.min(self, axis, skipna, level, numeric_only, **kwargs)
1112911198

1113011199
setattr(cls, "min", min)

pandas/tests/frame/test_reductions.py

+17
Original file line numberDiff line numberDiff line change
@@ -1765,3 +1765,20 @@ def test_prod_sum_min_count_mixed_object():
17651765
msg = re.escape("unsupported operand type(s) for +: 'int' and 'str'")
17661766
with pytest.raises(TypeError, match=msg):
17671767
df.sum(axis=0, min_count=1, numeric_only=False)
1768+
1769+
1770+
@pytest.mark.parametrize("method", ["min", "max", "mean", "median", "skew", "kurt"])
1771+
def test_reduction_axis_none_deprecation(method):
1772+
# GH#21597 deprecate axis=None defaulting to axis=0 so that we can change it
1773+
# to reducing over all axes.
1774+
1775+
df = DataFrame(np.random.randn(4, 4))
1776+
meth = getattr(df, method)
1777+
1778+
msg = f"scalar {method} over the entire DataFrame"
1779+
with tm.assert_produces_warning(FutureWarning, match=msg):
1780+
res = meth(axis=None)
1781+
with tm.assert_produces_warning(None):
1782+
expected = meth()
1783+
tm.assert_series_equal(res, expected)
1784+
tm.assert_series_equal(res, meth(axis=0))

pandas/tests/groupby/test_categorical.py

+19-4
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ def get_stats(group):
8181
assert result.index.names[0] == "C"
8282

8383

84-
def test_basic():
84+
def test_basic(): # TODO: split this test
8585

8686
cats = Categorical(
8787
["a", "a", "a", "b", "b", "b", "c", "c", "c"],
@@ -142,9 +142,24 @@ def f(x):
142142
df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"]
143143
)
144144
tm.assert_frame_equal(df.groupby(c, observed=False).transform(sum), df[["a"]])
145-
tm.assert_frame_equal(
146-
df.groupby(c, observed=False).transform(lambda xs: np.max(xs)), df[["a"]]
147-
)
145+
146+
gbc = df.groupby(c, observed=False)
147+
with tm.assert_produces_warning(
148+
FutureWarning, match="scalar max", check_stacklevel=False
149+
):
150+
# stacklevel is thrown off (i think) bc the stack goes through numpy C code
151+
result = gbc.transform(lambda xs: np.max(xs))
152+
tm.assert_frame_equal(result, df[["a"]])
153+
154+
with tm.assert_produces_warning(None):
155+
result2 = gbc.transform(lambda xs: np.max(xs, axis=0))
156+
result3 = gbc.transform(max)
157+
result4 = gbc.transform(np.maximum.reduce)
158+
result5 = gbc.transform(lambda xs: np.maximum.reduce(xs))
159+
tm.assert_frame_equal(result2, df[["a"]], check_dtype=False)
160+
tm.assert_frame_equal(result3, df[["a"]], check_dtype=False)
161+
tm.assert_frame_equal(result4, df[["a"]])
162+
tm.assert_frame_equal(result5, df[["a"]])
148163

149164
# Filter
150165
tm.assert_series_equal(df.a.groupby(c, observed=False).filter(np.all), df["a"])

pandas/tests/groupby/test_function.py

+19-6
Original file line numberDiff line numberDiff line change
@@ -69,20 +69,33 @@ def test_builtins_apply(keys, f):
6969
df = DataFrame(np.random.randint(1, 50, (1000, 2)), columns=["jim", "joe"])
7070
df["jolie"] = np.random.randn(1000)
7171

72+
gb = df.groupby(keys)
73+
7274
fname = f.__name__
73-
result = df.groupby(keys).apply(f)
75+
result = gb.apply(f)
7476
ngroups = len(df.drop_duplicates(subset=keys))
7577

7678
assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))"
7779
assert result.shape == (ngroups, 3), assert_msg
7880

79-
tm.assert_frame_equal(
80-
result, # numpy's equivalent function
81-
df.groupby(keys).apply(getattr(np, fname)),
82-
)
81+
npfunc = getattr(np, fname) # numpy's equivalent function
82+
if f in [max, min]:
83+
warn = FutureWarning
84+
else:
85+
warn = None
86+
msg = "scalar (max|min) over the entire DataFrame"
87+
with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False):
88+
# stacklevel can be thrown off because (i think) the stack
89+
# goes through some of numpy's C code.
90+
expected = gb.apply(npfunc)
91+
tm.assert_frame_equal(result, expected)
92+
93+
with tm.assert_produces_warning(None):
94+
expected2 = gb.apply(lambda x: npfunc(x, axis=0))
95+
tm.assert_frame_equal(result, expected2)
8396

8497
if f != sum:
85-
expected = df.groupby(keys).agg(fname).reset_index()
98+
expected = gb.agg(fname).reset_index()
8699
expected.set_index(keys, inplace=True, drop=False)
87100
tm.assert_frame_equal(result, expected, check_dtype=False)
88101

pandas/tests/groupby/transform/test_transform.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -483,9 +483,16 @@ def test_transform_coercion():
483483
g = df.groupby("A")
484484

485485
expected = g.transform(np.mean)
486-
result = g.transform(lambda x: np.mean(x))
486+
487+
msg = "will return a scalar mean"
488+
with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False):
489+
result = g.transform(lambda x: np.mean(x))
487490
tm.assert_frame_equal(result, expected)
488491

492+
with tm.assert_produces_warning(None):
493+
result2 = g.transform(lambda x: np.mean(x, axis=0))
494+
tm.assert_frame_equal(result2, expected)
495+
489496

490497
def test_groupby_transform_with_int():
491498

0 commit comments

Comments
 (0)