diff --git a/doc/source/whatsnew/v0.15.1.rst b/doc/source/whatsnew/v0.15.1.rst index 09b59f35972cd..765201996d544 100644 --- a/doc/source/whatsnew/v0.15.1.rst +++ b/doc/source/whatsnew/v0.15.1.rst @@ -92,7 +92,7 @@ API changes .. code-block:: ipython - In [4]: gr.apply(sum) + In [4]: gr.apply("sum") Out[4]: joe jim @@ -102,9 +102,8 @@ API changes current behavior: .. ipython:: python - :okwarning: - gr.apply(sum) + gr.apply("sum") - Support for slicing with monotonic decreasing indexes, even if ``start`` or ``stop`` is not found in the index (:issue:`7860`): diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 8bb051b6228ce..a95f0485abd5f 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -199,6 +199,7 @@ Removal of prior version deprecations/changes - Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) +- Methods ``apply``, ``agg``, and ``transform`` will no longer replace NumPy functions (e.g. ``np.sum``) and built-in functions (e.g. ``min``) with the equivalent pandas implementation; use string aliases (e.g. ``"sum"`` and ``"min"``) if you desire to use the pandas implementation (:issue:`53974`) - Passing both ``freq`` and ``fill_value`` in :meth:`DataFrame.shift` and :meth:`Series.shift` and :meth:`.DataFrameGroupBy.shift` now raises a ``ValueError`` (:issue:`54818`) - Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`) - Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) diff --git a/pandas/_testing/__init__.py b/pandas/_testing/__init__.py index ddd8d07c5f2eb..12395b42bba19 100644 --- a/pandas/_testing/__init__.py +++ b/pandas/_testing/__init__.py @@ -398,9 +398,6 @@ def external_error_raised(expected_exception: type[Exception]) -> ContextManager return pytest.raises(expected_exception, match=None) -cython_table = pd.core.common._cython_table.items() - - def get_cython_table_params(ndframe, func_names_and_expected): """ Combine frame, functions from com._cython_table @@ -421,11 +418,6 @@ def get_cython_table_params(ndframe, func_names_and_expected): results = [] for func_name, expected in func_names_and_expected: results.append((ndframe, func_name, expected)) - results += [ - (ndframe, func, expected) - for func, name in cython_table - if name == func_name - ] return results diff --git a/pandas/core/apply.py b/pandas/core/apply.py index 0dabb76c2581d..d9d95c96ba0fe 100644 --- a/pandas/core/apply.py +++ b/pandas/core/apply.py @@ -175,10 +175,7 @@ def agg(self) -> DataFrame | Series | None: Result of aggregation, or None if agg cannot be performed by this method. """ - obj = self.obj func = self.func - args = self.args - kwargs = self.kwargs if isinstance(func, str): return self.apply_str() @@ -189,12 +186,6 @@ def agg(self) -> DataFrame | Series | None: # we require a list, but not a 'str' return self.agg_list_like() - if callable(func): - f = com.get_cython_func(func) - if f and not args and not kwargs: - warn_alias_replacement(obj, func, f) - return getattr(obj, f)() - # caller can react return None @@ -300,12 +291,6 @@ def transform_str_or_callable(self, func) -> DataFrame | Series: if isinstance(func, str): return self._apply_str(obj, func, *args, **kwargs) - if not args and not kwargs: - f = com.get_cython_func(func) - if f: - warn_alias_replacement(obj, func, f) - return getattr(obj, f)() - # Two possible ways to use a UDF - apply or call directly try: return obj.apply(func, args=args, **kwargs) diff --git a/pandas/core/common.py b/pandas/core/common.py index 8ae30c5257c8b..5f37f3de578e8 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -608,22 +608,6 @@ def require_length_match(data, index: Index) -> None: ) -# the ufuncs np.maximum.reduce and np.minimum.reduce default to axis=0, -# whereas np.min and np.max (which directly call obj.min and obj.max) -# default to axis=None. -_builtin_table = { - builtins.sum: np.sum, - builtins.max: np.maximum.reduce, - builtins.min: np.minimum.reduce, -} - -# GH#53425: Only for deprecation -_builtin_table_alias = { - builtins.sum: "np.sum", - builtins.max: "np.maximum.reduce", - builtins.min: "np.minimum.reduce", -} - _cython_table = { builtins.sum: "sum", builtins.max: "max", @@ -660,14 +644,6 @@ def get_cython_func(arg: Callable) -> str | None: return _cython_table.get(arg) -def is_builtin_func(arg): - """ - if we define a builtin function for this argument, return it, - otherwise return the arg - """ - return _builtin_table.get(arg, arg) - - def fill_missing_names(names: Sequence[Hashable | None]) -> list[Hashable]: """ If a name is missing then replace it by level_n, where n is the count diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a604283f3d078..ab5e8bbd4528c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -59,7 +59,6 @@ maybe_mangle_lambdas, reconstruct_func, validate_func_kwargs, - warn_alias_replacement, ) import pandas.core.common as com from pandas.core.frame import DataFrame @@ -357,11 +356,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) return ret else: - cyfunc = com.get_cython_func(func) - if cyfunc and not args and not kwargs: - warn_alias_replacement(self, func, cyfunc) - return getattr(self, cyfunc)() - if maybe_use_numba(engine): return self._aggregate_with_numba( func, *args, engine_kwargs=engine_kwargs, **kwargs @@ -409,11 +403,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate def _python_agg_general(self, func, *args, **kwargs): - orig_func = func - func = com.is_builtin_func(func) - if orig_func != func: - alias = com._builtin_table_alias[func] - warn_alias_replacement(self, orig_func, alias) f = lambda x: func(x, *args, **kwargs) obj = self._obj_with_exclusions @@ -1656,11 +1645,6 @@ def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs) agg = aggregate def _python_agg_general(self, func, *args, **kwargs): - orig_func = func - func = com.is_builtin_func(func) - if orig_func != func: - alias = com._builtin_table_alias[func] - warn_alias_replacement(self, orig_func, alias) f = lambda x: func(x, *args, **kwargs) if self.ngroups == 0: diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 4fa0fe140924a..61168f71f4924 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -94,7 +94,6 @@ class providing the base-class of operations. sample, ) from pandas.core._numba import executor -from pandas.core.apply import warn_alias_replacement from pandas.core.arrays import ( ArrowExtensionArray, BaseMaskedArray, @@ -1647,12 +1646,6 @@ def apply(self, func, *args, include_groups: bool = True, **kwargs) -> NDFrameT: b 2 dtype: int64 """ - orig_func = func - func = com.is_builtin_func(func) - if orig_func != func: - alias = com._builtin_table_alias[orig_func] - warn_alias_replacement(self, orig_func, alias) - if isinstance(func, str): if hasattr(self, func): res = getattr(self, func) @@ -1868,11 +1861,6 @@ def _cython_transform(self, how: str, numeric_only: bool = False, **kwargs): @final def _transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): # optimized transforms - orig_func = func - func = com.get_cython_func(func) or func - if orig_func != func: - warn_alias_replacement(self, orig_func, func) - if not isinstance(func, str): return self._transform_general(func, engine, engine_kwargs, *args, **kwargs) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 4147437114b2f..915cd189c73a9 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -45,16 +45,12 @@ ) import pandas.core.algorithms as algos -from pandas.core.apply import ( - ResamplerWindowApply, - warn_alias_replacement, -) +from pandas.core.apply import ResamplerWindowApply from pandas.core.arrays import ArrowExtensionArray from pandas.core.base import ( PandasObject, SelectionMixin, ) -import pandas.core.common as com from pandas.core.generic import ( NDFrame, _shared_docs, @@ -1609,10 +1605,6 @@ def _downsample(self, how, **kwargs): how : string / cython mapped function **kwargs : kw args passed to how function """ - orig_how = how - how = com.get_cython_func(how) or how - if orig_how != how: - warn_alias_replacement(self, orig_how, how) ax = self.ax # Excludes `on` column when provided @@ -1775,10 +1767,6 @@ def _downsample(self, how, **kwargs): if self.kind == "timestamp": return super()._downsample(how, **kwargs) - orig_how = how - how = com.get_cython_func(how) or how - if orig_how != how: - warn_alias_replacement(self, orig_how, how) ax = self.ax if is_subperiod(ax.freq, self.freq): diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index 2dd7f9902c78d..9f3fee686a056 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -1699,13 +1699,11 @@ def foo2(x, b=2, c=0): def test_agg_std(): df = DataFrame(np.arange(6).reshape(3, 2), columns=["A", "B"]) - with tm.assert_produces_warning(FutureWarning, match="using DataFrame.std"): - result = df.agg(np.std) + result = df.agg(np.std, ddof=1) expected = Series({"A": 2.0, "B": 2.0}, dtype=float) tm.assert_series_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="using Series.std"): - result = df.agg([np.std]) + result = df.agg([np.std], ddof=1) expected = DataFrame({"A": 2.0, "B": 2.0}, index=["std"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/apply/test_frame_apply_relabeling.py b/pandas/tests/apply/test_frame_apply_relabeling.py index 723bdd349c0cb..57c109abba304 100644 --- a/pandas/tests/apply/test_frame_apply_relabeling.py +++ b/pandas/tests/apply/test_frame_apply_relabeling.py @@ -49,24 +49,20 @@ def test_agg_relabel_multi_columns_multi_methods(): def test_agg_relabel_partial_functions(): # GH 26513, test on partial, functools or more complex cases df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]}) - msg = "using Series.[mean|min]" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) + result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min)) expected = pd.DataFrame( {"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"]) ) tm.assert_frame_equal(result, expected) - msg = "using Series.[mean|min|max|sum]" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.agg( - foo=("A", min), - bar=("A", np.min), - cat=("B", max), - dat=("C", "min"), - f=("B", np.sum), - kk=("B", lambda x: min(x)), - ) + result = df.agg( + foo=("A", min), + bar=("A", np.min), + cat=("B", max), + dat=("C", "min"), + f=("B", np.sum), + kk=("B", lambda x: min(x)), + ) expected = pd.DataFrame( { "A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan], diff --git a/pandas/tests/apply/test_series_apply.py b/pandas/tests/apply/test_series_apply.py index e0153c97b8f29..53fc135e77780 100644 --- a/pandas/tests/apply/test_series_apply.py +++ b/pandas/tests/apply/test_series_apply.py @@ -547,10 +547,7 @@ def test_apply_listlike_reducer(string_series, ops, names, how, kwargs): # GH 39140 expected = Series({name: op(string_series) for name, op in zip(names, ops)}) expected.name = "series" - warn = FutureWarning if how == "agg" else None - msg = f"using Series.[{'|'.join(names)}]" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(string_series, how)(ops, **kwargs) + result = getattr(string_series, how)(ops, **kwargs) tm.assert_series_equal(result, expected) @@ -571,10 +568,7 @@ def test_apply_dictlike_reducer(string_series, ops, how, kwargs, by_row): # GH 39140 expected = Series({name: op(string_series) for name, op in ops.items()}) expected.name = string_series.name - warn = FutureWarning if how == "agg" else None - msg = "using Series.[sum|mean]" - with tm.assert_produces_warning(warn, match=msg): - result = getattr(string_series, how)(ops, **kwargs) + result = getattr(string_series, how)(ops, **kwargs) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/apply/test_series_apply_relabeling.py b/pandas/tests/apply/test_series_apply_relabeling.py index cdfa054f91c9b..c0a285e6eb38c 100644 --- a/pandas/tests/apply/test_series_apply_relabeling.py +++ b/pandas/tests/apply/test_series_apply_relabeling.py @@ -14,12 +14,8 @@ def test_relabel_no_duplicated_method(): expected = df["B"].agg({"foo": "min", "bar": "max"}) tm.assert_series_equal(result, expected) - msg = "using Series.[sum|min|max]" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df["B"].agg(foo=sum, bar=min, cat="max") - msg = "using Series.[sum|min|max]" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"}) + result = df["B"].agg(foo=sum, bar=min, cat="max") + expected = df["B"].agg({"foo": sum, "bar": min, "cat": "max"}) tm.assert_series_equal(result, expected) @@ -32,8 +28,6 @@ def test_relabel_duplicated_method(): expected = pd.Series([6, 6], index=["foo", "bar"], name="A") tm.assert_series_equal(result, expected) - msg = "using Series.min" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df["B"].agg(foo=min, bar="min") + result = df["B"].agg(foo=min, bar="min") expected = pd.Series([1, 1], index=["foo", "bar"], name="B") tm.assert_series_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c6962815ffda1..255784e8bf24d 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -289,9 +289,7 @@ def func(ser): def test_agg_multiple_functions_maintain_order(df): # GH #610 funcs = [("mean", np.mean), ("max", np.max), ("min", np.min)] - msg = "is currently using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A")["C"].agg(funcs) + result = df.groupby("A")["C"].agg(funcs) exp_cols = Index(["mean", "max", "min"]) tm.assert_index_equal(result.columns, exp_cols) @@ -881,11 +879,9 @@ def test_agg_relabel_multiindex_column( expected = DataFrame({"a_max": [1, 3]}, index=idx) tm.assert_frame_equal(result, expected) - msg = "is currently using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(("x", "group")).agg( - col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 - ) + result = df.groupby(("x", "group")).agg( + col_1=agg_col1, col_2=agg_col2, col_3=agg_col3 + ) expected = DataFrame( {"col_1": agg_result1, "col_2": agg_result2, "col_3": agg_result3}, index=idx ) @@ -1036,13 +1032,6 @@ def test_groupby_as_index_agg(df): gr = df.groupby(ts) gr.nth(0) # invokes set_selection_from_grouper internally - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - res = gr.apply(sum) - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - alt = df.groupby(ts).apply(sum) - tm.assert_frame_equal(res, alt) - for attr in ["mean", "max", "count", "idxmax", "cumsum", "all"]: gr = df.groupby(ts, as_index=False) left = getattr(gr, attr)() diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 19eef06de9136..aafd06e8f88cf 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -21,7 +21,6 @@ bdate_range, ) import pandas._testing as tm -import pandas.core.common as com @pytest.mark.parametrize( @@ -85,10 +84,8 @@ def test_cython_agg_boolean(): } ) result = frame.groupby("a")["b"].mean() - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - expected = frame.groupby("a")["b"].agg(np.mean) + # GH#53425 + expected = frame.groupby("a")["b"].agg(np.mean) tm.assert_series_equal(result, expected) @@ -152,10 +149,7 @@ def test_cython_fail_agg(): grouped = ts.groupby(lambda x: x.month) summed = grouped.sum() - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - expected = grouped.agg(np.sum) + expected = grouped.agg(np.sum) tm.assert_series_equal(summed, expected) @@ -176,13 +170,12 @@ def test_cython_fail_agg(): def test__cython_agg_general(op, targop): df = DataFrame(np.random.default_rng(2).standard_normal(1000)) labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) + kwargs = {"ddof": 1} if op == "var" else {} + if op not in ["first", "last"]: + kwargs["axis"] = 0 result = df.groupby(labels)._cython_agg_general(op, alt=None, numeric_only=True) - warn = FutureWarning if targop in com._cython_table else None - msg = f"using DataFrameGroupBy.{op}" - with tm.assert_produces_warning(warn, match=msg): - # GH#53425 - expected = df.groupby(labels).agg(targop) + expected = df.groupby(labels).agg(targop, **kwargs) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/aggregate/test_other.py b/pandas/tests/groupby/aggregate/test_other.py index ef45381ba1a7f..12f99e3cf7a63 100644 --- a/pandas/tests/groupby/aggregate/test_other.py +++ b/pandas/tests/groupby/aggregate/test_other.py @@ -410,10 +410,7 @@ def __call__(self, x): expected = df.groupby("foo").agg("sum") for ecall in equiv_callables: - warn = FutureWarning if ecall is sum or ecall is np.sum else None - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(warn, match=msg): - result = df.groupby("foo").agg(ecall) + result = df.groupby("foo").agg(ecall) tm.assert_frame_equal(result, expected) @@ -587,9 +584,7 @@ def test_agg_category_nansum(observed): df = DataFrame( {"A": pd.Categorical(["a", "a", "b"], categories=categories), "B": [1, 2, 3]} ) - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A", observed=observed).B.agg(np.nansum) + result = df.groupby("A", observed=observed).B.agg(np.nansum) expected = Series( [3, 3, 0], index=pd.CategoricalIndex(["a", "b", "c"], categories=categories, name="A"), diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index ee82d8ad37c2d..dcb73bdba2f9c 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1177,17 +1177,14 @@ def test_apply_is_unchanged_when_other_methods_are_called_first(reduction_func): # Check output when no other methods are called before .apply() grp = df.groupby(by="a") - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - result = grp.apply(sum, include_groups=False) + result = grp.apply(np.sum, axis=0, include_groups=False) tm.assert_frame_equal(result, expected) # Check output when another method is called before .apply() grp = df.groupby(by="a") args = get_groupby_method_args(reduction_func, df) _ = getattr(grp, reduction_func)(*args) - with tm.assert_produces_warning(FutureWarning, match=msg, check_stacklevel=False): - result = grp.apply(sum, include_groups=False) + result = grp.apply(np.sum, axis=0, include_groups=False) tm.assert_frame_equal(result, expected) @@ -1503,46 +1500,16 @@ def test_include_groups(include_groups): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize("f", [max, min, sum]) -@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key -def test_builtins_apply(keys, f): - # see gh-8155 - rs = np.random.default_rng(2) - df = DataFrame(rs.integers(1, 7, (10, 2)), columns=["jim", "joe"]) - df["jolie"] = rs.standard_normal(10) +@pytest.mark.parametrize("func, value", [(max, 2), (min, 1), (sum, 3)]) +def test_builtins_apply(func, value): + # GH#8155, GH#53974 + # Builtins act as e.g. sum(group), which sums the column labels of group + df = DataFrame({0: [1, 1, 2], 1: [3, 4, 5], 2: [3, 4, 5]}) + gb = df.groupby(0) + result = gb.apply(func, include_groups=False) - gb = df.groupby(keys) - - fname = f.__name__ - - warn = None if f is not sum else FutureWarning - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning( - warn, match=msg, check_stacklevel=False, raise_on_extra_warnings=False - ): - # Also warns on deprecation GH#53425 - result = gb.apply(f) - ngroups = len(df.drop_duplicates(subset=keys)) - - assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" - assert result.shape == (ngroups, 3), assert_msg - - npfunc = lambda x: getattr(np, fname)(x, axis=0) # numpy's equivalent function - msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning(DeprecationWarning, match=msg): - expected = gb.apply(npfunc) - tm.assert_frame_equal(result, expected) - - with tm.assert_produces_warning(DeprecationWarning, match=msg): - expected2 = gb.apply(lambda x: npfunc(x)) - tm.assert_frame_equal(result, expected2) - - if f != sum: - expected = gb.agg(fname).reset_index() - expected.set_index(keys, inplace=True, drop=False) - tm.assert_frame_equal(result, expected, check_dtype=False) - - tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0)) + expected = Series([value, value], index=Index([1, 2], name=0)) + tm.assert_series_equal(result, expected) def test_inconsistent_return_type(): diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 10eca5ea8427f..76c8a6fdb9570 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -138,19 +138,13 @@ def f(x): df = DataFrame({"a": [5, 15, 25]}) c = pd.cut(df.a, bins=[0, 10, 20, 30, 40]) - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = df.a.groupby(c, observed=False).transform(sum) + result = df.a.groupby(c, observed=False).transform(sum) tm.assert_series_equal(result, df["a"]) tm.assert_series_equal( df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] ) - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = df.groupby(c, observed=False).transform(sum) + result = df.groupby(c, observed=False).transform(sum) expected = df[["a"]] tm.assert_frame_equal(result, expected) @@ -159,10 +153,7 @@ def f(x): tm.assert_frame_equal(result, df[["a"]]) result2 = gbc.transform(lambda xs: np.max(xs, axis=0)) - msg = "using DataFrameGroupBy.max" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result3 = gbc.transform(max) + result3 = gbc.transform(max) result4 = gbc.transform(np.maximum.reduce) result5 = gbc.transform(lambda xs: np.maximum.reduce(xs)) tm.assert_frame_equal(result2, df[["a"]], check_dtype=False) @@ -178,19 +169,13 @@ def f(x): df = DataFrame({"a": [5, 15, 25, -5]}) c = pd.cut(df.a, bins=[-10, 0, 10, 20, 30, 40]) - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = df.a.groupby(c, observed=False).transform(sum) + result = df.a.groupby(c, observed=False).transform(sum) tm.assert_series_equal(result, df["a"]) tm.assert_series_equal( df.a.groupby(c, observed=False).transform(lambda xs: np.sum(xs)), df["a"] ) - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = df.groupby(c, observed=False).transform(sum) + result = df.groupby(c, observed=False).transform(sum) expected = df[["a"]] tm.assert_frame_equal(result, expected) @@ -319,10 +304,7 @@ def test_apply(ordered): result = grouped.mean() tm.assert_frame_equal(result, expected) - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = grouped.agg(np.mean) + result = grouped.agg(np.mean) tm.assert_frame_equal(result, expected) # but for transform we should still get back the original index @@ -1245,10 +1227,7 @@ def test_seriesgroupby_observed_true(df_cat, operation): expected = Series(data=[2, 4, 1, 3], index=index, name="C").sort_index() grouped = df_cat.groupby(["A", "B"], observed=True)["C"] - msg = "using np.sum" if operation == "apply" else "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = getattr(grouped, operation)(sum) + result = getattr(grouped, operation)(sum) tm.assert_series_equal(result, expected) @@ -1267,10 +1246,7 @@ def test_seriesgroupby_observed_false_or_none(df_cat, observed, operation): expected = Series(data=[2, 4, 0, 1, 0, 3], index=index, name="C") grouped = df_cat.groupby(["A", "B"], observed=observed)["C"] - msg = "using SeriesGroupBy.sum" if operation == "agg" else "using np.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = getattr(grouped, operation)(sum) + result = getattr(grouped, operation)(sum) tm.assert_series_equal(result, expected) @@ -1709,10 +1685,7 @@ def test_categorical_transform(): categories=["Waiting", "OnTheWay", "Delivered"], ordered=True ) df["status"] = df["status"].astype(delivery_status_type) - msg = "using SeriesGroupBy.max" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - df["last_status"] = df.groupby("package_id")["status"].transform(max) + df["last_status"] = df.groupby("package_id")["status"].transform(max) result = df.copy() expected = DataFrame( @@ -1727,21 +1700,17 @@ def test_categorical_transform(): "Waiting", ], "last_status": [ - "Delivered", - "Delivered", - "Delivered", - "OnTheWay", - "OnTheWay", + "Waiting", + "Waiting", + "Waiting", + "Waiting", + "Waiting", "Waiting", ], } ) expected["status"] = expected["status"].astype(delivery_status_type) - - # .transform(max) should preserve ordered categoricals - expected["last_status"] = expected["last_status"].astype(delivery_status_type) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index c2456790e4953..18465d00d17e2 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -86,7 +86,7 @@ def df_with_cat_col(): def _call_and_check(klass, msg, how, gb, groupby_func, args, warn_msg=""): warn_klass = None if warn_msg == "" else FutureWarning - with tm.assert_produces_warning(warn_klass, match=warn_msg): + with tm.assert_produces_warning(warn_klass, match=warn_msg, check_stacklevel=False): if klass is None: if how == "method": getattr(gb, groupby_func)(*args) @@ -219,15 +219,14 @@ def test_groupby_raises_string_np( np.sum: (None, ""), np.mean: ( TypeError, - re.escape("agg function failed [how->mean,dtype->object]"), + "Could not convert string .* to numeric", ), }[groupby_func_np] - - if groupby_series: - warn_msg = "using SeriesGroupBy.[sum|mean]" + if how == "transform" and groupby_func_np is np.sum and not groupby_series: + warn_msg = "The behavior of DataFrame.sum with axis=None is deprecated" else: - warn_msg = "using DataFrameGroupBy.[sum|mean]" - _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg) + warn_msg = "" + _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) @@ -328,15 +327,13 @@ def test_groupby_raises_datetime_np( gb = gb["d"] klass, msg = { - np.sum: (TypeError, "datetime64 type does not support sum operations"), + np.sum: ( + TypeError, + re.escape("datetime64[us] does not support reduction 'sum'"), + ), np.mean: (None, ""), }[groupby_func_np] - - if groupby_series: - warn_msg = "using SeriesGroupBy.[sum|mean]" - else: - warn_msg = "using DataFrameGroupBy.[sum|mean]" - _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func_np, ()) @pytest.mark.parametrize("func", ["prod", "cumprod", "skew", "var"]) @@ -528,18 +525,13 @@ def test_groupby_raises_category_np( gb = gb["d"] klass, msg = { - np.sum: (TypeError, "category type does not support sum operations"), + np.sum: (TypeError, "dtype category does not support reduction 'sum'"), np.mean: ( TypeError, - "category dtype does not support aggregation 'mean'", + "dtype category does not support reduction 'mean'", ), }[groupby_func_np] - - if groupby_series: - warn_msg = "using SeriesGroupBy.[sum|mean]" - else: - warn_msg = "using DataFrameGroupBy.[sum|mean]" - _call_and_check(klass, msg, how, gb, groupby_func_np, (), warn_msg=warn_msg) + _call_and_check(klass, msg, how, gb, groupby_func_np, ()) @pytest.mark.parametrize("how", ["method", "agg", "transform"]) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index 1a32dcefed91a..e304a5ae467d8 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -36,20 +36,17 @@ def test_basic_aggregations(dtype): for k, v in grouped: assert len(v) == 3 - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = grouped.aggregate(np.mean) + agged = grouped.aggregate(np.mean) assert agged[1] == 1 - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = grouped.agg(np.mean) + expected = grouped.agg(np.mean) tm.assert_series_equal(agged, expected) # shorthand tm.assert_series_equal(agged, grouped.mean()) result = grouped.sum() - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = grouped.agg(np.sum) + expected = grouped.agg(np.sum) + if dtype == "int32": + # NumPy's sum returns int64 + expected = expected.astype("int32") tm.assert_series_equal(result, expected) expected = grouped.apply(lambda x: x * x.sum()) @@ -58,15 +55,11 @@ def test_basic_aggregations(dtype): tm.assert_series_equal(transformed, expected) value_grouped = data.groupby(data) - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = value_grouped.aggregate(np.mean) + result = value_grouped.aggregate(np.mean) tm.assert_series_equal(result, agged, check_index_type=False) # complex agg - msg = "using SeriesGroupBy.[mean|std]" - with tm.assert_produces_warning(FutureWarning, match=msg): - agged = grouped.aggregate([np.mean, np.std]) + agged = grouped.aggregate([np.mean, np.std]) msg = r"nested renamer is not supported" with pytest.raises(pd.errors.SpecificationError, match=msg): @@ -450,15 +443,11 @@ def test_cython_median(): labels[::17] = np.nan result = df.groupby(labels).median() - msg = "using DataFrameGroupBy.median" - with tm.assert_produces_warning(FutureWarning, match=msg): - exp = df.groupby(labels).agg(np.nanmedian) + exp = df.groupby(labels).agg(np.nanmedian) tm.assert_frame_equal(result, exp) df = DataFrame(np.random.default_rng(2).standard_normal((1000, 5))) - msg = "using DataFrameGroupBy.median" - with tm.assert_produces_warning(FutureWarning, match=msg): - rs = df.groupby(labels).agg(np.median) + rs = df.groupby(labels).agg(np.median) xp = df.groupby(labels).median() tm.assert_frame_equal(rs, xp) @@ -953,15 +942,11 @@ def test_intercept_builtin_sum(): s = Series([1.0, 2.0, np.nan, 3.0]) grouped = s.groupby([0, 1, 2, 2]) - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = grouped.agg(builtins.sum) - msg = "using np.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result2 = grouped.apply(builtins.sum) - expected = grouped.sum() + # GH#53425 + result = grouped.agg(builtins.sum) + # GH#53425 + result2 = grouped.apply(builtins.sum) + expected = Series([1.0, 2.0, np.nan], index=np.array([0, 1, 2])) tm.assert_series_equal(result, expected) tm.assert_series_equal(result2, expected) @@ -1096,10 +1081,8 @@ def test_ops_general(op, targop): labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) result = getattr(df.groupby(labels), op)() - warn = None if op in ("first", "last", "count", "sem") else FutureWarning - msg = f"using DataFrameGroupBy.{op}" - with tm.assert_produces_warning(warn, match=msg): - expected = df.groupby(labels).agg(targop) + kwargs = {"ddof": 1, "axis": 0} if op in ["std", "var"] else {} + expected = df.groupby(labels).agg(targop, **kwargs) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index 1bb3539830900..db327cc689afe 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -78,9 +78,7 @@ def demean(arr): # GH 9700 df = DataFrame({"a": range(5, 10), "b": range(5)}) - msg = "using DataFrameGroupBy.max" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("a").transform(max) + result = df.groupby("a").transform(max) expected = DataFrame({"b": range(5)}) tm.assert_frame_equal(result, expected) @@ -98,9 +96,7 @@ def test_transform_fast(): values = np.repeat(grp.mean().values, ensure_platform_int(grp.count().values)) expected = Series(values, index=df.index, name="val") - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grp.transform(np.mean) + result = grp.transform(np.mean) tm.assert_series_equal(result, expected) result = grp.transform("mean") @@ -151,18 +147,14 @@ def test_transform_fast3(): def test_transform_broadcast(tsframe, ts): grouped = ts.groupby(lambda x: x.month) - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.transform(np.mean) + result = grouped.transform(np.mean) tm.assert_index_equal(result.index, ts.index) for _, gp in grouped: assert_fp_equal(result.reindex(gp.index), gp.mean()) grouped = tsframe.groupby(lambda x: x.month) - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = grouped.transform(np.mean) + result = grouped.transform(np.mean) tm.assert_index_equal(result.index, tsframe.index) for _, gp in grouped: agged = gp.mean(axis=0) @@ -309,12 +301,8 @@ def test_transform_casting(): def test_transform_multiple(ts): grouped = ts.groupby([lambda x: x.year, lambda x: x.month]) - grouped.transform(lambda x: x * 2) - - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped.transform(np.mean) + grouped.transform(np.mean) def test_dispatch_transform(tsframe): @@ -419,15 +407,11 @@ def test_transform_nuisance_raises(df): def test_transform_function_aliases(df): result = df.groupby("A").transform("mean", numeric_only=True) - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby("A")[["C", "D"]].transform(np.mean) + expected = df.groupby("A")[["C", "D"]].transform(np.mean) tm.assert_frame_equal(result, expected) result = df.groupby("A")["C"].transform("mean") - msg = "using SeriesGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = df.groupby("A")["C"].transform(np.mean) + expected = df.groupby("A")["C"].transform(np.mean) tm.assert_series_equal(result, expected) @@ -447,22 +431,19 @@ def test_series_fast_transform_date(): tm.assert_series_equal(result, expected) -def test_transform_length(): +@pytest.mark.parametrize("func", [lambda x: np.nansum(x), sum]) +def test_transform_length(func): # GH 9697 df = DataFrame({"col1": [1, 1, 2, 2], "col2": [1, 2, 3, np.nan]}) - expected = Series([3.0] * 4) - - def nsum(x): - return np.nansum(x) + if func is sum: + expected = Series([3.0, 3.0, np.nan, np.nan]) + else: + expected = Series([3.0] * 4) - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - results = [ - df.groupby("col1").transform(sum)["col2"], - df.groupby("col1")["col2"].transform(sum), - df.groupby("col1").transform(nsum)["col2"], - df.groupby("col1")["col2"].transform(nsum), - ] + results = [ + df.groupby("col1").transform(func)["col2"], + df.groupby("col1")["col2"].transform(func), + ] for result in results: tm.assert_series_equal(result, expected, check_names=False) @@ -474,10 +455,7 @@ def test_transform_coercion(): df = DataFrame({"A": ["a", "a", "b", "b"], "B": [0, 1, 3, 4]}) g = df.groupby("A") - msg = "using DataFrameGroupBy.mean" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = g.transform(np.mean) - + expected = g.transform(np.mean) result = g.transform(lambda x: np.mean(x, axis=0)) tm.assert_frame_equal(result, expected) @@ -547,9 +525,7 @@ def test_groupby_transform_with_int(): def test_groupby_transform_with_nan_group(): # GH 9941 df = DataFrame({"a": range(10), "b": [1, 1, 2, 3, np.nan, 4, 4, 5, 5, 5]}) - msg = "using SeriesGroupBy.max" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby(df.b)["a"].transform(max) + result = df.groupby(df.b)["a"].transform(max) expected = Series([1.0, 1.0, 2.0, 3.0, np.nan, 6.0, 6.0, 9.0, 9.0, 9.0], name="a") tm.assert_series_equal(result, expected) @@ -1019,9 +995,7 @@ def test_any_all_np_func(func): exp = Series([True, np.nan, True], name="val") - msg = "using SeriesGroupBy.[any|all]" - with tm.assert_produces_warning(FutureWarning, match=msg): - res = df.groupby("key")["val"].transform(func) + res = df.groupby("key")["val"].transform(func) tm.assert_series_equal(res, exp) @@ -1051,10 +1025,7 @@ def test_groupby_transform_timezone_column(func): # GH 24198 ts = pd.to_datetime("now", utc=True).tz_convert("Asia/Singapore") result = DataFrame({"end_time": [ts], "id": [1]}) - warn = FutureWarning if not isinstance(func, str) else None - msg = "using SeriesGroupBy.[min|max]" - with tm.assert_produces_warning(warn, match=msg): - result["max_end_time"] = result.groupby("id").end_time.transform(func) + result["max_end_time"] = result.groupby("id").end_time.transform(func) expected = DataFrame([[ts, 1, ts]], columns=["end_time", "id", "max_end_time"]) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index c5ef0f39ece19..461b6bfc3b420 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1900,9 +1900,7 @@ def test_resample_apply_product(duplicates, unit): if duplicates: df.columns = ["A", "A"] - msg = "using DatetimeIndexResampler.prod" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.resample("QE").apply(np.prod) + result = df.resample("QE").apply(np.prod) expected = DataFrame( np.array([[0, 24], [60, 210], [336, 720], [990, 1716]], dtype=np.int64), index=DatetimeIndex( diff --git a/pandas/tests/resample/test_resample_api.py b/pandas/tests/resample/test_resample_api.py index 465d287fd8eff..f3b9c909290a8 100644 --- a/pandas/tests/resample/test_resample_api.py +++ b/pandas/tests/resample/test_resample_api.py @@ -440,34 +440,30 @@ def cases(request): def test_agg_mixed_column_aggregation(cases, a_mean, a_std, b_mean, b_std, request): expected = pd.concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", "std"]]) - msg = "using SeriesGroupBy.[mean|std]" + expected.columns = pd.MultiIndex.from_product([["A", "B"], ["mean", ""]]) # "date" is an index and a column, so get included in the agg if "df_mult" in request.node.callspec.id: date_mean = cases["date"].mean() date_std = cases["date"].std() expected = pd.concat([date_mean, date_std, expected], axis=1) expected.columns = pd.MultiIndex.from_product( - [["date", "A", "B"], ["mean", "std"]] + [["date", "A", "B"], ["mean", ""]] ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = cases.aggregate([np.mean, np.std]) + result = cases.aggregate([np.mean, lambda x: np.std(x, ddof=1)]) tm.assert_frame_equal(result, expected) @pytest.mark.parametrize( "agg", [ - {"func": {"A": np.mean, "B": np.std}}, - {"A": ("A", np.mean), "B": ("B", np.std)}, - {"A": NamedAgg("A", np.mean), "B": NamedAgg("B", np.std)}, + {"func": {"A": np.mean, "B": lambda x: np.std(x, ddof=1)}}, + {"A": ("A", np.mean), "B": ("B", lambda x: np.std(x, ddof=1))}, + {"A": NamedAgg("A", np.mean), "B": NamedAgg("B", lambda x: np.std(x, ddof=1))}, ], ) def test_agg_both_mean_std_named_result(cases, a_mean, b_std, agg): - msg = "using SeriesGroupBy.[mean|std]" expected = pd.concat([a_mean, b_std], axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = cases.aggregate(**agg) + result = cases.aggregate(**agg) tm.assert_frame_equal(result, expected, check_like=True) @@ -523,11 +519,9 @@ def test_agg_dict_of_lists(cases, a_mean, a_std, b_mean, b_std): ) def test_agg_with_lambda(cases, agg): # passed lambda - msg = "using SeriesGroupBy.sum" rcustom = cases["B"].apply(lambda x: np.std(x, ddof=1)) expected = pd.concat([cases["A"].sum(), rcustom], axis=1) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = cases.agg(**agg) + result = cases.agg(**agg) tm.assert_frame_equal(result, expected, check_like=True) diff --git a/pandas/tests/resample/test_time_grouper.py b/pandas/tests/resample/test_time_grouper.py index c5e202f36659b..11ad9240527d5 100644 --- a/pandas/tests/resample/test_time_grouper.py +++ b/pandas/tests/resample/test_time_grouper.py @@ -57,12 +57,8 @@ def test_count(test_series): def test_numpy_reduction(test_series): result = test_series.resample("YE", closed="right").prod() - - msg = "using SeriesGroupBy.prod" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = test_series.groupby(lambda x: x.year).agg(np.prod) + expected = test_series.groupby(lambda x: x.year).agg(np.prod) expected.index = result.index - tm.assert_series_equal(result, expected) diff --git a/pandas/tests/reshape/test_crosstab.py b/pandas/tests/reshape/test_crosstab.py index 2a538d34d8b2c..c4af63fe5cc81 100644 --- a/pandas/tests/reshape/test_crosstab.py +++ b/pandas/tests/reshape/test_crosstab.py @@ -452,11 +452,9 @@ def test_crosstab_normalize_arrays(self): index=Index([1, 2, "All"], name="a", dtype="object"), columns=Index([3, 4, "All"], name="b", dtype="object"), ) - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - test_case = crosstab( - df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True - ) + test_case = crosstab( + df.a, df.b, df.c, aggfunc=np.sum, normalize="all", margins=True + ) tm.assert_frame_equal(test_case, norm_sum) def test_crosstab_with_empties(self): @@ -655,16 +653,14 @@ def test_crosstab_normalize_multiple_columns(self): } ) - msg = "using DataFrameGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = crosstab( - [df.A, df.B], - df.C, - values=df.D, - aggfunc=np.sum, - normalize=True, - margins=True, - ) + result = crosstab( + [df.A, df.B], + df.C, + values=df.D, + aggfunc=np.sum, + normalize=True, + margins=True, + ) expected = DataFrame( np.array([0] * 29 + [1], dtype=float).reshape(10, 3), columns=Index(["bar", "foo", "All"], name="C"), diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 916fe6fdb64b1..99250dc929997 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -11,6 +11,8 @@ from pandas._config import using_pyarrow_string_dtype +from pandas.compat.numpy import np_version_gte1p25 + import pandas as pd from pandas import ( Categorical, @@ -2059,10 +2061,10 @@ def test_pivot_string_as_func(self): [ ("sum", np.sum), ("mean", np.mean), - ("std", np.std), + ("min", np.min), (["sum", "mean"], [np.sum, np.mean]), - (["sum", "std"], [np.sum, np.std]), - (["std", "mean"], [np.std, np.mean]), + (["sum", "min"], [np.sum, np.min]), + (["max", "mean"], [np.max, np.mean]), ], ) def test_pivot_string_func_vs_func(self, f, f_numpy, data): @@ -2070,10 +2072,14 @@ def test_pivot_string_func_vs_func(self, f, f_numpy, data): # for consistency purposes data = data.drop(columns="C") result = pivot_table(data, index="A", columns="B", aggfunc=f) - ops = "|".join(f) if isinstance(f, list) else f - msg = f"using DataFrameGroupBy.[{ops}]" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy) + expected = pivot_table(data, index="A", columns="B", aggfunc=f_numpy) + + if not np_version_gte1p25 and isinstance(f_numpy, list): + # Prior to 1.25, np.min/np.max would come through as amin and amax + mapper = {"amin": "min", "amax": "max", "sum": "sum", "mean": "mean"} + expected.columns = expected.columns.map( + lambda x: (mapper[x[0]], x[1], x[2]) + ) tm.assert_frame_equal(result, expected) @pytest.mark.slow diff --git a/pandas/tests/window/test_api.py b/pandas/tests/window/test_api.py index b4d555203212e..5ba08ac13fcee 100644 --- a/pandas/tests/window/test_api.py +++ b/pandas/tests/window/test_api.py @@ -87,14 +87,12 @@ def test_agg(step): b_mean = r["B"].mean() b_std = r["B"].std() - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"): - result = r.aggregate([np.mean, np.std]) + result = r.aggregate([np.mean, lambda x: np.std(x, ddof=1)]) expected = concat([a_mean, a_std, b_mean, b_std], axis=1) - expected.columns = MultiIndex.from_product([["A", "B"], ["mean", "std"]]) + expected.columns = MultiIndex.from_product([["A", "B"], ["mean", ""]]) tm.assert_frame_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[mean|std]"): - result = r.aggregate({"A": np.mean, "B": np.std}) + result = r.aggregate({"A": np.mean, "B": lambda x: np.std(x, ddof=1)}) expected = concat([a_mean, b_std], axis=1) tm.assert_frame_equal(result, expected, check_like=True) @@ -134,8 +132,7 @@ def test_agg_apply(raw): r = df.rolling(window=3) a_sum = r["A"].sum() - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|std]"): - result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) + result = r.agg({"A": np.sum, "B": lambda x: np.std(x, ddof=1)}) rcustom = r["B"].apply(lambda x: np.std(x, ddof=1), raw=raw) expected = concat([a_sum, rcustom], axis=1) tm.assert_frame_equal(result, expected, check_like=True) @@ -145,18 +142,15 @@ def test_agg_consistency(step): df = DataFrame({"A": range(5), "B": range(0, 10, 2)}) r = df.rolling(window=3, step=step) - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"): - result = r.agg([np.sum, np.mean]).columns + result = r.agg([np.sum, np.mean]).columns expected = MultiIndex.from_product([list("AB"), ["sum", "mean"]]) tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"): - result = r["A"].agg([np.sum, np.mean]).columns + result = r["A"].agg([np.sum, np.mean]).columns expected = Index(["sum", "mean"]) tm.assert_index_equal(result, expected) - with tm.assert_produces_warning(FutureWarning, match="using Rolling.[sum|mean]"): - result = r.agg({"A": [np.sum, np.mean]}).columns + result = r.agg({"A": [np.sum, np.mean]}).columns expected = MultiIndex.from_tuples([("A", "sum"), ("A", "mean")]) tm.assert_index_equal(result, expected)