@@ -36,7 +36,6 @@ class providing the base-class of operations.
36
36
from pandas ._libs import Timestamp
37
37
import pandas ._libs .groupby as libgroupby
38
38
from pandas ._typing import FrameOrSeries , Scalar
39
- from pandas .compat import set_function_name
40
39
from pandas .compat .numpy import function as nv
41
40
from pandas .errors import AbstractMethodError
42
41
from pandas .util ._decorators import Appender , Substitution , cache_readonly , doc
@@ -192,6 +191,24 @@ class providing the base-class of operations.
192
191
""" ,
193
192
)
194
193
194
+ _groupby_agg_method_template = """
195
+ Compute {fname} of group values.
196
+
197
+ Parameters
198
+ ----------
199
+ numeric_only : bool, default {no}
200
+ Include only float, int, boolean columns. If None, will attempt to use
201
+ everything, then use only numeric data.
202
+ min_count : int, default {mc}
203
+ The required number of valid values to perform the operation. If fewer
204
+ than ``min_count`` non-NA values are present the result will be NA.
205
+
206
+ Returns
207
+ -------
208
+ Series or DataFrame
209
+ Computed {fname} of values within each group.
210
+ """
211
+
195
212
_pipe_template = """
196
213
Apply a function `func` with arguments to this %(klass)s object and return
197
214
the function's result.
@@ -945,6 +962,37 @@ def _wrap_transformed_output(self, output: Mapping[base.OutputKey, np.ndarray]):
945
962
def _wrap_applied_output (self , keys , values , not_indexed_same : bool = False ):
946
963
raise AbstractMethodError (self )
947
964
965
+ def _agg_general (
966
+ self ,
967
+ numeric_only : bool = True ,
968
+ min_count : int = - 1 ,
969
+ * ,
970
+ alias : str ,
971
+ npfunc : Callable ,
972
+ ):
973
+ self ._set_group_selection ()
974
+
975
+ # try a cython aggregation if we can
976
+ try :
977
+ return self ._cython_agg_general (
978
+ how = alias , alt = npfunc , numeric_only = numeric_only , min_count = min_count ,
979
+ )
980
+ except DataError :
981
+ pass
982
+ except NotImplementedError as err :
983
+ if "function is not implemented for this dtype" in str (
984
+ err
985
+ ) or "category dtype not supported" in str (err ):
986
+ # raised in _get_cython_function, in some cases can
987
+ # be trimmed by implementing cython funcs for more dtypes
988
+ pass
989
+ else :
990
+ raise
991
+
992
+ # apply a non-cython aggregation
993
+ result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
994
+ return result
995
+
948
996
def _cython_agg_general (
949
997
self , how : str , alt = None , numeric_only : bool = True , min_count : int = - 1
950
998
):
@@ -1438,74 +1486,36 @@ def size(self):
1438
1486
result = self ._obj_1d_constructor (result )
1439
1487
return self ._reindex_output (result , fill_value = 0 )
1440
1488
1441
- @classmethod
1442
- def _add_numeric_operations ( cls ):
1443
- """
1444
- Add numeric operations to the GroupBy generically.
1445
- """
1489
+ @doc ( _groupby_agg_method_template , fname = "sum" , no = True , mc = 0 )
1490
+ def sum ( self , numeric_only : bool = True , min_count : int = 0 ):
1491
+ return self . _agg_general (
1492
+ numeric_only = numeric_only , min_count = min_count , alias = "add" , npfunc = np . sum
1493
+ )
1446
1494
1447
- def groupby_function (
1448
- name : str ,
1449
- alias : str ,
1450
- npfunc ,
1451
- numeric_only : bool = True ,
1452
- min_count : int = - 1 ,
1453
- ):
1495
+ @doc (_groupby_agg_method_template , fname = "prod" , no = True , mc = 0 )
1496
+ def prod (self , numeric_only : bool = True , min_count : int = 0 ):
1497
+ return self ._agg_general (
1498
+ numeric_only = numeric_only , min_count = min_count , alias = "prod" , npfunc = np .prod
1499
+ )
1454
1500
1455
- _local_template = """
1456
- Compute %(f)s of group values.
1457
-
1458
- Parameters
1459
- ----------
1460
- numeric_only : bool, default %(no)s
1461
- Include only float, int, boolean columns. If None, will attempt to use
1462
- everything, then use only numeric data.
1463
- min_count : int, default %(mc)s
1464
- The required number of valid values to perform the operation. If fewer
1465
- than ``min_count`` non-NA values are present the result will be NA.
1466
-
1467
- Returns
1468
- -------
1469
- Series or DataFrame
1470
- Computed %(f)s of values within each group.
1471
- """
1472
-
1473
- @Substitution (name = "groupby" , f = name , no = numeric_only , mc = min_count )
1474
- @Appender (_common_see_also )
1475
- @Appender (_local_template )
1476
- def func (self , numeric_only = numeric_only , min_count = min_count ):
1477
- self ._set_group_selection ()
1478
-
1479
- # try a cython aggregation if we can
1480
- try :
1481
- return self ._cython_agg_general (
1482
- how = alias ,
1483
- alt = npfunc ,
1484
- numeric_only = numeric_only ,
1485
- min_count = min_count ,
1486
- )
1487
- except DataError :
1488
- pass
1489
- except NotImplementedError as err :
1490
- if "function is not implemented for this dtype" in str (
1491
- err
1492
- ) or "category dtype not supported" in str (err ):
1493
- # raised in _get_cython_function, in some cases can
1494
- # be trimmed by implementing cython funcs for more dtypes
1495
- pass
1496
- else :
1497
- raise
1498
-
1499
- # apply a non-cython aggregation
1500
- result = self .aggregate (lambda x : npfunc (x , axis = self .axis ))
1501
- return result
1502
-
1503
- set_function_name (func , name , cls )
1504
-
1505
- return func
1501
+ @doc (_groupby_agg_method_template , fname = "min" , no = False , mc = - 1 )
1502
+ def min (self , numeric_only : bool = False , min_count : int = - 1 ):
1503
+ return self ._agg_general (
1504
+ numeric_only = numeric_only , min_count = min_count , alias = "min" , npfunc = np .min
1505
+ )
1506
1506
1507
+ @doc (_groupby_agg_method_template , fname = "max" , no = False , mc = - 1 )
1508
+ def max (self , numeric_only : bool = False , min_count : int = - 1 ):
1509
+ return self ._agg_general (
1510
+ numeric_only = numeric_only , min_count = min_count , alias = "max" , npfunc = np .max
1511
+ )
1512
+
1513
+ @doc (_groupby_agg_method_template , fname = "first" , no = False , mc = - 1 )
1514
+ def first (self , numeric_only : bool = False , min_count : int = - 1 ):
1507
1515
def first_compat (obj : FrameOrSeries , axis : int = 0 ):
1508
1516
def first (x : Series ):
1517
+ """Helper function for first item that isn't NA.
1518
+ """
1509
1519
x = x .array [notna (x .array )]
1510
1520
if len (x ) == 0 :
1511
1521
return np .nan
@@ -1518,8 +1528,19 @@ def first(x: Series):
1518
1528
else :
1519
1529
raise TypeError (type (obj ))
1520
1530
1531
+ return self ._agg_general (
1532
+ numeric_only = numeric_only ,
1533
+ min_count = min_count ,
1534
+ alias = "first" ,
1535
+ npfunc = first_compat ,
1536
+ )
1537
+
1538
+ @doc (_groupby_agg_method_template , fname = "last" , no = False , mc = - 1 )
1539
+ def last (self , numeric_only : bool = False , min_count : int = - 1 ):
1521
1540
def last_compat (obj : FrameOrSeries , axis : int = 0 ):
1522
1541
def last (x : Series ):
1542
+ """Helper function for last item that isn't NA.
1543
+ """
1523
1544
x = x .array [notna (x .array )]
1524
1545
if len (x ) == 0 :
1525
1546
return np .nan
@@ -1532,12 +1553,12 @@ def last(x: Series):
1532
1553
else :
1533
1554
raise TypeError (type (obj ))
1534
1555
1535
- cls . sum = groupby_function ( "sum" , "add" , np . sum , min_count = 0 )
1536
- cls . prod = groupby_function ( "prod" , "prod" , np . prod , min_count = 0 )
1537
- cls . min = groupby_function ( "min" , "min" , np . min , numeric_only = False )
1538
- cls . max = groupby_function ( "max" , "max" , np . max , numeric_only = False )
1539
- cls . first = groupby_function ( "first" , "first" , first_compat , numeric_only = False )
1540
- cls . last = groupby_function ( "last" , "last" , last_compat , numeric_only = False )
1556
+ return self . _agg_general (
1557
+ numeric_only = numeric_only ,
1558
+ min_count = min_count ,
1559
+ alias = "last" ,
1560
+ npfunc = last_compat ,
1561
+ )
1541
1562
1542
1563
@Substitution (name = "groupby" )
1543
1564
@Appender (_common_see_also )
@@ -2637,9 +2658,6 @@ def _reindex_output(
2637
2658
return output .reset_index (drop = True )
2638
2659
2639
2660
2640
- GroupBy ._add_numeric_operations ()
2641
-
2642
-
2643
2661
@doc (GroupBy )
2644
2662
def get_groupby (
2645
2663
obj : NDFrame ,
0 commit comments