From b1ca103aab8919a1f32725a86cdf3185cbaabe07 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Apr 2020 21:01:52 -0700 Subject: [PATCH 1/4] DOC: Fix groupby.agg/transform rst reference and numba references --- doc/source/reference/groupby.rst | 6 ++- pandas/core/groupby/generic.py | 43 ++++--------------- pandas/core/groupby/groupby.py | 73 +++++++++++++++++++++++++++++++- 3 files changed, 83 insertions(+), 39 deletions(-) diff --git a/doc/source/reference/groupby.rst b/doc/source/reference/groupby.rst index 921eb737aef07..ca444dac9d77d 100644 --- a/doc/source/reference/groupby.rst +++ b/doc/source/reference/groupby.rst @@ -36,8 +36,10 @@ Function application GroupBy.apply GroupBy.agg - GroupBy.aggregate - GroupBy.transform + SeriesGroupBy.aggregate + DataFrameGroupBy.aggregate + SeriesGroupBy.transform + DataFrameGroupBy.transform GroupBy.pipe Computations / descriptive stats diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ddf553dd1dd62..39213628fcc6d 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -63,10 +63,11 @@ import pandas.core.common as com from pandas.core.construction import create_series_with_explicit_dtype from pandas.core.frame import DataFrame -from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame, _shared_docs +from pandas.core.generic import ABCDataFrame, ABCSeries, NDFrame from pandas.core.groupby import base from pandas.core.groupby.groupby import ( GroupBy, + _agg_template, _apply_docs, _transform_template, get_groupby, @@ -177,16 +178,6 @@ def _selection_name(self): else: return self._selection - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.Series.groupby.apply - pandas.Series.groupby.transform - pandas.Series.aggregate - """ - ) - _agg_examples_doc = dedent( """ Examples @@ -237,13 +228,9 @@ def apply(self, func, *args, **kwargs): return super().apply(func, *args, **kwargs) @Substitution( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - versionadded="", - klass="Series", - axis="", + examples=_agg_examples_doc, klass="Series", ) - @Appender(_shared_docs["aggregate"]) + @Appender(_agg_template) def aggregate( self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs ): @@ -476,7 +463,7 @@ def _aggregate_named(self, func, *args, **kwargs): return result - @Substitution(klass="Series", selected="A.") + @Substitution(klass="Series") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): func = self._get_cython_func(func) or func @@ -854,16 +841,6 @@ class DataFrameGroupBy(GroupBy[DataFrame]): _apply_whitelist = base.dataframe_apply_whitelist - _agg_see_also_doc = dedent( - """ - See Also - -------- - pandas.DataFrame.groupby.apply - pandas.DataFrame.groupby.transform - pandas.DataFrame.aggregate - """ - ) - _agg_examples_doc = dedent( """ Examples @@ -941,13 +918,9 @@ class DataFrameGroupBy(GroupBy[DataFrame]): ) @Substitution( - see_also=_agg_see_also_doc, - examples=_agg_examples_doc, - versionadded="", - klass="DataFrame", - axis="", + examples=_agg_examples_doc, klass="DataFrame", ) - @Appender(_shared_docs["aggregate"]) + @Appender(_agg_template) def aggregate( self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs ): @@ -1466,7 +1439,7 @@ def _transform_general( concatenated = concatenated.reindex(concat_index, axis=other_axis, copy=False) return self._set_result_index_ordered(concatenated) - @Substitution(klass="DataFrame", selected="") + @Substitution(klass="DataFrame") @Appender(_transform_template) def transform(self, func, *args, engine="cython", engine_kwargs=None, **kwargs): diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 6924c7d320bc4..f0cb19f874260 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -291,7 +291,9 @@ class providing the base-class of operations. See Also -------- -aggregate, transform +%(klass)s.groupby.apply +%(klass)s.groupby.aggregate +%(klass)s.transform Notes ----- @@ -310,6 +312,10 @@ class providing the base-class of operations. * f must not mutate groups. Mutation is not supported and may produce unexpected results. +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. + Examples -------- @@ -317,7 +323,7 @@ class providing the base-class of operations. >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', ... 'foo', 'bar'], ... 'B' : ['one', 'one', 'two', 'three', -... 'two', 'two'], +... 'two', 'two'], ... 'C' : [1, 5, 5, 2, 5, 5], ... 'D' : [2.0, 5., 8., 1., 2., 9.]}) >>> grouped = df.groupby('A') @@ -341,6 +347,69 @@ class providing the base-class of operations. 5 3 8.0 """ +_agg_template = """ +Aggregate using one or more operations over the specified axis. + +Parameters +---------- +func : function, str, list or dict + Function to use for aggregating the data. If a function, must either + work when passed a %(klass)s or when passed to %(klass)s.apply. + + Accepted combinations are: + + - function + - string function name + - list of functions and/or function names, e.g. ``[np.sum, 'mean']`` + - dict of axis labels -> functions, function names or list of such. + + Can also accept a Numba JIT function with + ``engine='numba'`` specified. + + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + .. versionchanged:: 1.1.0 +*args + Positional arguments to pass to func +engine : str, default 'cython' + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + + .. versionadded:: 1.1.0 +engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + .. versionadded:: 1.1.0 +**kwargs + Keyword arguments to be passed into func. + +Returns +------- +%(klass)s + +See Also +-------- +%(klass)s.groupby.apply +%(klass)s.groupby.transform +%(klass)s.aggregate + +Notes +----- +When using ``engine='numba'``, there will be no "fall back" behavior internally. +The group data and group index will be passed as numpy arrays to the JITed +user defined function, and no alternative execution attempts will be tried. +%(examples)s +""" + class GroupByPlot(PandasObject): """ From da5db507f956e9d9da886427b8390691ba8ce34f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Apr 2020 23:36:58 -0700 Subject: [PATCH 2/4] maybe fix docstring validation --- pandas/core/groupby/generic.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 39213628fcc6d..c49acc6fb2005 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -216,7 +216,7 @@ def _selection_name(self): minimum maximum 1 1 2 2 3 4 - """ +""" ) @Appender( @@ -905,7 +905,6 @@ class DataFrameGroupBy(GroupBy[DataFrame]): 1 1 0.590715 2 3 0.704907 - - The keywords are the *output* column names - The values are tuples whose first element is the column to select and the second element is the aggregation to apply to that column. @@ -913,8 +912,7 @@ class DataFrameGroupBy(GroupBy[DataFrame]): ``['column', 'aggfunc']`` to make it clearer what the arguments are. As usual, the aggregation can be a callable or a string alias. - See :ref:`groupby.aggregate.named` for more. - """ + See :ref:`groupby.aggregate.named` for more.""" ) @Substitution( From d3441d2a26b77d2eb2a4534023c3a5313d9752ca Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 28 Apr 2020 23:40:30 -0700 Subject: [PATCH 3/4] fix more warnings --- pandas/core/groupby/groupby.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f0cb19f874260..81c3fd7ad9e89 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -319,7 +319,6 @@ class providing the base-class of operations. Examples -------- -# Same shape >>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar', ... 'foo', 'bar'], ... 'B' : ['one', 'one', 'two', 'three', @@ -336,7 +335,8 @@ class providing the base-class of operations. 4 0.577350 -0.577350 5 0.577350 1.000000 -# Broadcastable +Broadcast result of the transformation + >>> grouped.transform(lambda x: x.max() - x.min()) C D 0 4 6.0 From f189bd33869ffba1e6b369b8e49e3bc9a3b857cc Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Wed, 29 Apr 2020 21:43:39 -0700 Subject: [PATCH 4/4] Hopefully fix docstring validation --- pandas/core/groupby/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9cb071a0c40ff..b35798079ba7f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -215,8 +215,7 @@ def _selection_name(self): ... ) minimum maximum 1 1 2 - 2 3 4 -""" + 2 3 4""" ) @Appender(