From fa83641eef84fa6fbf068d8c3faaa58d3b5529b7 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 28 Mar 2023 23:10:12 -0400 Subject: [PATCH 1/4] DEPR: Passing a dictionary to SeriesGroupBy.agg --- doc/source/whatsnew/v2.1.0.rst | 1 + pandas/core/groupby/generic.py | 10 +++ pandas/core/groupby/groupby.py | 87 +++++++++++++++++++++++++++ pandas/tests/groupby/test_groupby.py | 4 +- pandas/tests/groupby/test_grouping.py | 4 +- 5 files changed, 104 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 1f8c93978c890..643bcc0df8981 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -115,6 +115,7 @@ Deprecations - Deprecated 'method', 'limit', and 'fill_axis' keywords in :meth:`DataFrame.align` and :meth:`Series.align`, explicitly call ``fillna`` on the alignment results instead (:issue:`51856`) - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated the 'axis' keyword in :meth:`.GroupBy.idxmax`, :meth:`.GroupBy.idxmin`, :meth:`.GroupBy.fillna`, :meth:`.GroupBy.take`, :meth:`.GroupBy.skew`, :meth:`.GroupBy.rank`, :meth:`.GroupBy.cumprod`, :meth:`.GroupBy.cumsum`, :meth:`.GroupBy.cummax`, :meth:`.GroupBy.cummin`, :meth:`.GroupBy.pct_change`, :meth:`GroupBy.diff`, :meth:`.GroupBy.shift`, and :meth:`DataFrameGroupBy.corrwith`; for ``axis=1`` operate on the underlying :class:`DataFrame` instead (:issue:`50405`, :issue:`51046`) +- Deprecated passing a dictionary to :meth:`.SeriesGroupBy.agg`; pass a list of aggregations instead (:issue:`50684`) - Deprecated :meth:`DataFrame.swapaxes` and :meth:`Series.swapaxes`, use :meth:`DataFrame.transpose` or :meth:`Series.transpose` instead (:issue:`51946`) - diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e87a74e5885b3..3a545f88dda27 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -306,6 +306,16 @@ def _aggregate_multiple_funcs(self, arg, *args, **kwargs) -> DataFrame: raise SpecificationError("nested renamer is not supported") else: # GH#50684 - This accidentally worked in 1.x + msg = ( + "Passing a dictionary to SeriesGroupBy.agg is deprecated " + "and will raise in a future version of pandas. Pass a list " + "of aggregations instead." + ) + warnings.warn( + message=msg, + category=FutureWarning, + stacklevel=find_stack_level(), + ) arg = list(arg.items()) elif any(isinstance(x, (tuple, list)) for x in arg): arg = [(x, x) if not isinstance(x, (tuple, list)) else x for x in arg] diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index e591298e2a58e..614f748a91cde 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -489,6 +489,93 @@ class providing the base-class of operations. -------- %(example)s""" +_agg_template_series = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list, dict or None + Function to use for aggregating the data. If a function, must either + work when passed a {klass} or when passed to {klass}.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + - None, in which case ``**kwargs`` are used with Named Aggregation. Here the + output has one column for each element in ``**kwargs``. The name of the + column is keyword, whereas the value determines the aggregation used to compute + the values in the column. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. Only passing a single function is supported + with this engine. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 + + .. deprecated:: 2.1.0 + + Passing a dictionary is deprecated and will raise in a future version + of pandas. Pass a list of aggregations instead. +*args + Positional arguments to pass to func. +engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or globally setting ``compute.use_numba`` + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{{'nopython': True, 'nogil': False, 'parallel': False}}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + * If ``func`` is None, ``**kwargs`` are used to define the output names and + aggregations via Named Aggregation. See ``func`` entry. + * Otherwise, keyword arguments to be passed into func. + +Returns +------- +{klass} + +See Also +-------- +{klass}.groupby.apply : Apply function func group-wise + and combine the results together. +{klass}.groupby.transform : Transforms the Series on each group + based on the given function. +{klass}.aggregate : Aggregate using one or more + operations over the specified axis. + +Notes +----- +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + +Functions that mutate the passed object can produce unexpected +behavior or errors and are not supported. See :ref:`gotchas.udf-mutation` +for more details. + +.. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. +{examples}""" + _agg_template = """ Aggregate using one or more operations over the specified axis. diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index c4c7bee2970d0..74e9abd4bd883 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -727,7 +727,9 @@ def test_groupby_as_index_agg(df): expected3 = grouped["C"].sum() expected3 = DataFrame(expected3).rename(columns={"C": "Q"}) - result3 = grouped["C"].agg({"Q": np.sum}) + msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result3 = grouped["C"].agg({"Q": np.sum}) tm.assert_frame_equal(result3, expected3) # GH7115 & GH8112 & GH8582 diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 8e84a48eb7374..a582f00aa8e79 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -486,7 +486,9 @@ def test_multifunc_select_col_integer_cols(self, df): df.columns = np.arange(len(df.columns)) # it works! - df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) + msg = "Passing a dictionary to SeriesGroupBy.agg is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + df.groupby(1, as_index=False)[2].agg({"Q": np.mean}) def test_multiindex_columns_empty_level(self): lst = [["count", "values"], ["to filter", ""]] From 4f623979d35f2733f63022cf6695e569647f1b59 Mon Sep 17 00:00:00 2001 From: richard Date: Tue, 28 Mar 2023 23:14:45 -0400 Subject: [PATCH 2/4] fixup --- pandas/core/groupby/generic.py | 7 ++++--- pandas/core/groupby/groupby.py | 2 +- scripts/validate_unwanted_patterns.py | 3 ++- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3a545f88dda27..8949fb6529a7b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -68,7 +68,8 @@ from pandas.core.groupby.groupby import ( GroupBy, GroupByPlot, - _agg_template, + _agg_template_frame, + _agg_template_series, _apply_docs, _transform_template, ) @@ -214,7 +215,7 @@ def _get_data_to_aggregate( def apply(self, func, *args, **kwargs) -> Series: return super().apply(func, *args, **kwargs) - @doc(_agg_template, examples=_agg_examples_doc, klass="Series") + @doc(_agg_template_series, examples=_agg_examples_doc, klass="Series") def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): return self._aggregate_with_numba( @@ -1301,7 +1302,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]): """ ) - @doc(_agg_template, examples=_agg_examples_doc, klass="DataFrame") + @doc(_agg_template_frame, examples=_agg_examples_doc, klass="DataFrame") def aggregate(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs): if maybe_use_numba(engine): return self._aggregate_with_numba( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 614f748a91cde..2c57d4d8773a8 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -576,7 +576,7 @@ class providing the base-class of operations. see the examples below. {examples}""" -_agg_template = """ +_agg_template_frame = """ Aggregate using one or more operations over the specified axis. Parameters diff --git a/scripts/validate_unwanted_patterns.py b/scripts/validate_unwanted_patterns.py index 8203b62106b3e..394d1f8d4a99b 100755 --- a/scripts/validate_unwanted_patterns.py +++ b/scripts/validate_unwanted_patterns.py @@ -34,7 +34,8 @@ "_new_Index", "_new_PeriodIndex", "_doc_template", - "_agg_template", + "_agg_template_series", + "_agg_template_frame", "_pipe_template", "__main__", "_transform_template", From 42ed66bbd6dd4d2843c5d0059f241cfa6f68d21c Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 30 Mar 2023 16:47:51 -0400 Subject: [PATCH 3/4] Remove dict, fixup --- pandas/core/groupby/groupby.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 2c57d4d8773a8..b55650e6d115b 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -503,23 +503,22 @@ class providing the base-class of operations. - function - string function name - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` - - dict of axis labels -> functions, function names or list of such. - None, in which case ``**kwargs`` are used with Named Aggregation. Here the output has one column for each element in ``**kwargs``. The name of the column is keyword, whereas the value determines the aggregation used to compute the values in the column. - Can also accept a Numba JIT function with - ``engine='numba'`` specified. Only passing a single function is supported - with this engine. + .. versionchanged:: 1.1.0 - If the ``'numba'`` engine is chosen, the function must be - a user defined function with ``values`` and ``index`` as the - first and second arguments respectively in the function signature. - Each group's index will be passed to the user defined function - and optionally available for use. + Can also accept a Numba JIT function with + ``engine='numba'`` specified. Only passing a single function is supported + with this engine. - .. versionchanged:: 1.1.0 + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. .. deprecated:: 2.1.0 From 43eddcc06f1e02d310a8582f9e33037b68b20ad8 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Thu, 30 Mar 2023 16:48:37 -0400 Subject: [PATCH 4/4] fixup frame docstring --- pandas/core/groupby/groupby.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index b55650e6d115b..7b367157113b5 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -595,17 +595,18 @@ class providing the base-class of operations. column is keyword, whereas the value determines the aggregation used to compute the values in the column. - Can also accept a Numba JIT function with - ``engine='numba'`` specified. Only passing a single function is supported - with this engine. + .. versionchanged:: 1.1.0 - If the ``'numba'`` engine is chosen, the function must be - a user defined function with ``values`` and ``index`` as the - first and second arguments respectively in the function signature. - Each group's index will be passed to the user defined function - and optionally available for use. + Can also accept a Numba JIT function with + ``engine='numba'`` specified. Only passing a single function is supported + with this engine. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. - .. versionchanged:: 1.1.0 *args Positional arguments to pass to func. engine : str, default None