Skip to content

Commit 0f52751

Browse files
authored
CLN/TYP: Groupby agg methods (#34200)
1 parent 67b398a commit 0f52751

File tree

2 files changed

+94
-76
lines changed

2 files changed

+94
-76
lines changed

pandas/core/groupby/groupby.py

+91-73
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@ class providing the base-class of operations.
3636
from pandas._libs import Timestamp
3737
import pandas._libs.groupby as libgroupby
3838
from pandas._typing import FrameOrSeries, Scalar
39-
from pandas.compat import set_function_name
4039
from pandas.compat.numpy import function as nv
4140
from pandas.errors import AbstractMethodError
4241
from pandas.util._decorators import Appender, Substitution, cache_readonly, doc
@@ -192,6 +191,24 @@ class providing the base-class of operations.
192191
""",
193192
)
194193

194+
_groupby_agg_method_template = """
195+
Compute {fname} of group values.
196+
197+
Parameters
198+
----------
199+
numeric_only : bool, default {no}
200+
Include only float, int, boolean columns. If None, will attempt to use
201+
everything, then use only numeric data.
202+
min_count : int, default {mc}
203+
The required number of valid values to perform the operation. If fewer
204+
than ``min_count`` non-NA values are present the result will be NA.
205+
206+
Returns
207+
-------
208+
Series or DataFrame
209+
Computed {fname} of values within each group.
210+
"""
211+
195212
_pipe_template = """
196213
Apply a function `func` with arguments to this %(klass)s object and return
197214
the function's result.
@@ -945,6 +962,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
945962
def _wrap_applied_output(self, keys, values, not_indexed_same: bool = False):
946963
raise AbstractMethodError(self)
947964

965+
def _agg_general(
966+
self,
967+
numeric_only: bool = True,
968+
min_count: int = -1,
969+
*,
970+
alias: str,
971+
npfunc: Callable,
972+
):
973+
self._set_group_selection()
974+
975+
# try a cython aggregation if we can
976+
try:
977+
return self._cython_agg_general(
978+
how=alias, alt=npfunc, numeric_only=numeric_only, min_count=min_count,
979+
)
980+
except DataError:
981+
pass
982+
except NotImplementedError as err:
983+
if "function is not implemented for this dtype" in str(
984+
err
985+
) or "category dtype not supported" in str(err):
986+
# raised in _get_cython_function, in some cases can
987+
# be trimmed by implementing cython funcs for more dtypes
988+
pass
989+
else:
990+
raise
991+
992+
# apply a non-cython aggregation
993+
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
994+
return result
995+
948996
def _cython_agg_general(
949997
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
950998
):
@@ -1438,74 +1486,36 @@ def size(self):
14381486
result = self._obj_1d_constructor(result)
14391487
return self._reindex_output(result, fill_value=0)
14401488

1441-
@classmethod
1442-
def _add_numeric_operations(cls):
1443-
"""
1444-
Add numeric operations to the GroupBy generically.
1445-
"""
1489+
@doc(_groupby_agg_method_template, fname="sum", no=True, mc=0)
1490+
def sum(self, numeric_only: bool = True, min_count: int = 0):
1491+
return self._agg_general(
1492+
numeric_only=numeric_only, min_count=min_count, alias="add", npfunc=np.sum
1493+
)
14461494

1447-
def groupby_function(
1448-
name: str,
1449-
alias: str,
1450-
npfunc,
1451-
numeric_only: bool = True,
1452-
min_count: int = -1,
1453-
):
1495+
@doc(_groupby_agg_method_template, fname="prod", no=True, mc=0)
1496+
def prod(self, numeric_only: bool = True, min_count: int = 0):
1497+
return self._agg_general(
1498+
numeric_only=numeric_only, min_count=min_count, alias="prod", npfunc=np.prod
1499+
)
14541500

1455-
_local_template = """
1456-
Compute %(f)s of group values.
1457-
1458-
Parameters
1459-
----------
1460-
numeric_only : bool, default %(no)s
1461-
Include only float, int, boolean columns. If None, will attempt to use
1462-
everything, then use only numeric data.
1463-
min_count : int, default %(mc)s
1464-
The required number of valid values to perform the operation. If fewer
1465-
than ``min_count`` non-NA values are present the result will be NA.
1466-
1467-
Returns
1468-
-------
1469-
Series or DataFrame
1470-
Computed %(f)s of values within each group.
1471-
"""
1472-
1473-
@Substitution(name="groupby", f=name, no=numeric_only, mc=min_count)
1474-
@Appender(_common_see_also)
1475-
@Appender(_local_template)
1476-
def func(self, numeric_only=numeric_only, min_count=min_count):
1477-
self._set_group_selection()
1478-
1479-
# try a cython aggregation if we can
1480-
try:
1481-
return self._cython_agg_general(
1482-
how=alias,
1483-
alt=npfunc,
1484-
numeric_only=numeric_only,
1485-
min_count=min_count,
1486-
)
1487-
except DataError:
1488-
pass
1489-
except NotImplementedError as err:
1490-
if "function is not implemented for this dtype" in str(
1491-
err
1492-
) or "category dtype not supported" in str(err):
1493-
# raised in _get_cython_function, in some cases can
1494-
# be trimmed by implementing cython funcs for more dtypes
1495-
pass
1496-
else:
1497-
raise
1498-
1499-
# apply a non-cython aggregation
1500-
result = self.aggregate(lambda x: npfunc(x, axis=self.axis))
1501-
return result
1502-
1503-
set_function_name(func, name, cls)
1504-
1505-
return func
1501+
@doc(_groupby_agg_method_template, fname="min", no=False, mc=-1)
1502+
def min(self, numeric_only: bool = False, min_count: int = -1):
1503+
return self._agg_general(
1504+
numeric_only=numeric_only, min_count=min_count, alias="min", npfunc=np.min
1505+
)
15061506

1507+
@doc(_groupby_agg_method_template, fname="max", no=False, mc=-1)
1508+
def max(self, numeric_only: bool = False, min_count: int = -1):
1509+
return self._agg_general(
1510+
numeric_only=numeric_only, min_count=min_count, alias="max", npfunc=np.max
1511+
)
1512+
1513+
@doc(_groupby_agg_method_template, fname="first", no=False, mc=-1)
1514+
def first(self, numeric_only: bool = False, min_count: int = -1):
15071515
def first_compat(obj: FrameOrSeries, axis: int = 0):
15081516
def first(x: Series):
1517+
"""Helper function for first item that isn't NA.
1518+
"""
15091519
x = x.array[notna(x.array)]
15101520
if len(x) == 0:
15111521
return np.nan
@@ -1518,8 +1528,19 @@ def first(x: Series):
15181528
else:
15191529
raise TypeError(type(obj))
15201530

1531+
return self._agg_general(
1532+
numeric_only=numeric_only,
1533+
min_count=min_count,
1534+
alias="first",
1535+
npfunc=first_compat,
1536+
)
1537+
1538+
@doc(_groupby_agg_method_template, fname="last", no=False, mc=-1)
1539+
def last(self, numeric_only: bool = False, min_count: int = -1):
15211540
def last_compat(obj: FrameOrSeries, axis: int = 0):
15221541
def last(x: Series):
1542+
"""Helper function for last item that isn't NA.
1543+
"""
15231544
x = x.array[notna(x.array)]
15241545
if len(x) == 0:
15251546
return np.nan
@@ -1532,12 +1553,12 @@ def last(x: Series):
15321553
else:
15331554
raise TypeError(type(obj))
15341555

1535-
cls.sum = groupby_function("sum", "add", np.sum, min_count=0)
1536-
cls.prod = groupby_function("prod", "prod", np.prod, min_count=0)
1537-
cls.min = groupby_function("min", "min", np.min, numeric_only=False)
1538-
cls.max = groupby_function("max", "max", np.max, numeric_only=False)
1539-
cls.first = groupby_function("first", "first", first_compat, numeric_only=False)
1540-
cls.last = groupby_function("last", "last", last_compat, numeric_only=False)
1556+
return self._agg_general(
1557+
numeric_only=numeric_only,
1558+
min_count=min_count,
1559+
alias="last",
1560+
npfunc=last_compat,
1561+
)
15411562

15421563
@Substitution(name="groupby")
15431564
@Appender(_common_see_also)
@@ -2637,9 +2658,6 @@ def _reindex_output(
26372658
return output.reset_index(drop=True)
26382659

26392660

2640-
GroupBy._add_numeric_operations()
2641-
2642-
26432661
@doc(GroupBy)
26442662
def get_groupby(
26452663
obj: NDFrame,

pandas/util/_decorators.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def wrapper(*args, **kwargs) -> Callable[..., Any]:
329329
return decorate
330330

331331

332-
def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]:
332+
def doc(*args: Union[str, Callable], **kwargs) -> Callable[[F], F]:
333333
"""
334334
A decorator take docstring templates, concatenate them and perform string
335335
substitution on it.
@@ -345,8 +345,8 @@ def doc(*args: Union[str, Callable], **kwargs: str) -> Callable[[F], F]:
345345
*args : str or callable
346346
The string / docstring / docstring template to be appended in order
347347
after default docstring under function.
348-
**kwargs : str
349-
The string which would be used to format docstring template.
348+
**kwargs
349+
The objects which would be used to format docstring template.
350350
"""
351351

352352
def decorator(func: F) -> F:

0 commit comments

Comments
 (0)