From 5604093e47b3b51271ee9c8b6976c5fb7f74cf02 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 28 Feb 2024 17:54:11 -0500 Subject: [PATCH 1/4] CLN: Enforce deprecation of axis=None in DataFrame reductions --- doc/source/whatsnew/v3.0.0.rst | 2 +- pandas/core/frame.py | 8 +++-- pandas/core/generic.py | 32 ++----------------- pandas/tests/frame/test_npfuncs.py | 18 ++++------- .../tests/groupby/aggregate/test_aggregate.py | 8 +++-- pandas/tests/groupby/test_raises.py | 6 +--- pandas/tests/window/test_expanding.py | 9 ++---- 7 files changed, 24 insertions(+), 59 deletions(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index a95f0485abd5f..af3824cdf1d39 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -197,6 +197,7 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) +- Enforced deprecation of ``axis=None`` acting the same as ``axis=0`` in the DataFrame reductions ``sum``, ``prod``, ``std``, ``var``, and ``sem``, passing ``axis=None`` will now reduce over both axes; this is particularly the case when doing e.g. ``numpy.sum(df)`` (:issue:`21597`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) @@ -238,7 +239,6 @@ Removal of prior version deprecations/changes - Removed unused arguments ``*args`` and ``**kwargs`` in :class:`Resampler` methods (:issue:`50977`) - Unrecognized timezones when parsing strings to datetimes now raises a ``ValueError`` (:issue:`51477`) - .. --------------------------------------------------------------------------- .. _whatsnew_300.performance: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index f530466c0fc30..db9f377cdb70c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -11514,7 +11514,9 @@ def sum( min_count=min_count, **kwargs, ) - return result.__finalize__(self, method="sum") + if isinstance(result, Series): + result = result.__finalize__(self, method="sum") + return result @doc(make_doc("prod", ndim=2)) def prod( @@ -11532,7 +11534,9 @@ def prod( min_count=min_count, **kwargs, ) - return result.__finalize__(self, method="prod") + if isinstance(result, Series): + result = result.__finalize__(self, method="prod") + return result # error: Signature of "mean" incompatible with supertype "NDFrame" @overload # type: ignore[override] diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 1bc6b7a3eea03..6a28e49655e02 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -11445,7 +11445,7 @@ def _stat_function_ddof( self, name: str, func, - axis: Axis | None | lib.NoDefault = lib.no_default, + axis: Axis | None = 0, skipna: bool = True, ddof: int = 1, numeric_only: bool = False, @@ -11454,20 +11454,6 @@ def _stat_function_ddof( nv.validate_stat_ddof_func((), kwargs, fname=name) validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is None: - if self.ndim > 1: - warnings.warn( - f"The behavior of {type(self).__name__}.{name} with axis=None " - "is deprecated, in a future version this will reduce over both " - "axes and return a scalar. To retain the old behavior, pass " - "axis=0 (or do not pass axis)", - FutureWarning, - stacklevel=find_stack_level(), - ) - axis = 0 - elif axis is lib.no_default: - axis = 0 - return self._reduce( func, name, axis=axis, numeric_only=numeric_only, skipna=skipna, ddof=ddof ) @@ -11619,7 +11605,7 @@ def _min_count_stat_function( self, name: str, func, - axis: Axis | None | lib.NoDefault = lib.no_default, + axis: Axis | None = 0, skipna: bool = True, numeric_only: bool = False, min_count: int = 0, @@ -11630,20 +11616,6 @@ def _min_count_stat_function( validate_bool_kwarg(skipna, "skipna", none_allowed=False) - if axis is None: - if self.ndim > 1: - warnings.warn( - f"The behavior of {type(self).__name__}.{name} with axis=None " - "is deprecated, in a future version this will reduce over both " - "axes and return a scalar. To retain the old behavior, pass " - "axis=0 (or do not pass axis)", - FutureWarning, - stacklevel=find_stack_level(), - ) - axis = 0 - elif axis is lib.no_default: - axis = 0 - return self._reduce( func, name=name, diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index afb53bf2de93a..639bf71796493 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -27,22 +27,18 @@ def test_np_sqrt(self, float_frame): tm.assert_frame_equal(result, float_frame.apply(np.sqrt)) - def test_sum_deprecated_axis_behavior(self): - # GH#52042 deprecated behavior of df.sum(axis=None), which gets + def test_sum_axis_behavior(self): + # GH#52042 df.sum(axis=None) now reduces over both axes, which gets # called when we do np.sum(df) arr = np.random.default_rng(2).standard_normal((4, 3)) df = DataFrame(arr) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning( - FutureWarning, match=msg, check_stacklevel=False - ): - res = np.sum(df) - - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.sum(axis=None) - tm.assert_series_equal(res, expected) + res = np.sum(df) + expected = df.to_numpy().sum(axis=None) + print(res) + print(expected) + assert res == expected def test_np_ravel(self): # GH26247 diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 255784e8bf24d..246ae77f221f2 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -132,9 +132,7 @@ def test_agg_apply_corner(ts, tsframe): tm.assert_frame_equal(grouped.sum(), exp_df) tm.assert_frame_equal(grouped.agg("sum"), exp_df) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - res = grouped.apply(np.sum) + res = grouped.apply(np.sum, axis=0) tm.assert_frame_equal(res, exp_df) @@ -1032,6 +1030,10 @@ def test_groupby_as_index_agg(df): gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally + res = gr.apply(sum) + alt = df.groupby(ts).apply(sum) + tm.assert_frame_equal(res, alt) + for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: gr = df.groupby(ts, as_index=False) left = getattr(gr, attr)() diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 18465d00d17e2..f9d5de72eda1d 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -222,11 +222,7 @@ def test_groupby_raises_string_np( "Could not convert string .* to numeric", ), }[groupby_func_np] - if how == "transform" and groupby_func_np is np.sum and not groupby_series: - warn_msg = "The behavior of DataFrame.sum with axis=None is deprecated" - else: - warn_msg = "" - _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func_np, ()) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) diff --git a/pandas/tests/window/test_expanding.py b/pandas/tests/window/test_expanding.py index ad59f9e52514e..d375010aff3cc 100644 --- a/pandas/tests/window/test_expanding.py +++ b/pandas/tests/window/test_expanding.py @@ -310,7 +310,7 @@ def test_expanding_corr_pairwise(frame): @pytest.mark.parametrize( "func,static_comp", [ - ("sum", np.sum), + ("sum", lambda x: np.sum(x, axis=0)), ("mean", lambda x: np.mean(x, axis=0)), ("max", lambda x: np.max(x, axis=0)), ("min", lambda x: np.min(x, axis=0)), @@ -324,12 +324,7 @@ def test_expanding_func(func, static_comp, frame_or_series): result = getattr(obj, func)() assert isinstance(result, frame_or_series) - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - warn = None - if frame_or_series is DataFrame and static_comp is np.sum: - warn = FutureWarning - with tm.assert_produces_warning(warn, match=msg, check_stacklevel=False): - expected = static_comp(data[:11]) + expected = static_comp(data[:11]) if frame_or_series is Series: tm.assert_almost_equal(result[10], expected) else: From f9b5bca8610607f6c61372b79930015c37cbad2d Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 29 Feb 2024 16:27:44 -0500 Subject: [PATCH 2/4] Remove test --- pandas/tests/groupby/aggregate/test_aggregate.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 246ae77f221f2..3f000b64ce3dc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -1030,10 +1030,6 @@ def test_groupby_as_index_agg(df): gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally - res = gr.apply(sum) - alt = df.groupby(ts).apply(sum) - tm.assert_frame_equal(res, alt) - for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: gr = df.groupby(ts, as_index=False) left = getattr(gr, attr)() From 18432450a21d687447b17333c3b0cc94a39f89d2 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 29 Feb 2024 16:29:35 -0500 Subject: [PATCH 3/4] cleanup --- pandas/tests/frame/test_npfuncs.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/tests/frame/test_npfuncs.py b/pandas/tests/frame/test_npfuncs.py index 639bf71796493..6b5c469403130 100644 --- a/pandas/tests/frame/test_npfuncs.py +++ b/pandas/tests/frame/test_npfuncs.py @@ -36,8 +36,6 @@ def test_sum_axis_behavior(self): res = np.sum(df) expected = df.to_numpy().sum(axis=None) - print(res) - print(expected) assert res == expected def test_np_ravel(self): From 605fe479fd17b0b1167029ccabd1003ff6f4d018 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Tue, 5 Mar 2024 17:11:08 -0500 Subject: [PATCH 4/4] Skip ASV benchmark --- asv_bench/benchmarks/stat_ops.py | 1 + 1 file changed, 1 insertion(+) diff --git a/asv_bench/benchmarks/stat_ops.py b/asv_bench/benchmarks/stat_ops.py index 89bda81ccf08c..8913293dfa20e 100644 --- a/asv_bench/benchmarks/stat_ops.py +++ b/asv_bench/benchmarks/stat_ops.py @@ -33,6 +33,7 @@ def setup(self, op, axis): ("median", 1), ("median", None), ("std", 1), + ("std", None), ) ): # Skipping cases where datetime aggregations are not implemented