@@ -525,28 +525,6 @@ a trivial example is ``df.groupby('A').agg(lambda ser: 1)``. Note that
525
525
:meth: `~pd.core.groupby.DataFrameGroupBy.nth ` can act as a reducer *or * a
526
526
filter, see :ref: `here <groupby.nth >`.
527
527
528
- Decimal columns are "nuisance" columns that .agg automatically excludes in groupby.
529
-
530
- If you do wish to aggregate them you must do so explicitly:
531
-
532
- .. ipython :: python
533
-
534
- from decimal import Decimal
535
- dec = pd.DataFrame(
536
- {' name' : [' foo' , ' bar' , ' foo' , ' bar' ],
537
- ' title' : [' boo' , ' far' , ' boo' , ' far' ],
538
- ' id' : [123 , 456 , 123 , 456 ],
539
- ' int_column' : [1 , 2 , 3 , 4 ],
540
- ' dec_column1' : [Decimal(' 0.50' ), Decimal(' 0.15' ), Decimal(' 0.25' ), Decimal(' 0.40' )],
541
- ' dec_column2' : [Decimal(' 0.20' ), Decimal(' 0.30' ), Decimal(' 0.55' ), Decimal(' 0.60' )]
542
- },
543
- columns = [' name' ,' title' ,' id' ,' int_column' ,' dec_column1' ,' dec_column2' ]
544
- )
545
-
546
- dec.groupby([' name' , ' title' , ' id' ], as_index = False ).sum()
547
-
548
- dec.groupby([' name' , ' title' , ' id' ], as_index = False ).agg({' dec_column1' : ' sum' , ' dec_column2' : ' sum' })
549
-
550
528
.. _groupby.aggregate.multifunc :
551
529
552
530
Applying multiple functions at once
@@ -1038,6 +1016,42 @@ The returned dtype of the grouped will *always* include *all* of the categories
1038
1016
s = pd.Series([1 , 1 , 1 ]).groupby(pd.Categorical([' a' , ' a' , ' a' ], categories = [' a' , ' b' ]), observed = False ).count()
1039
1017
s.index.dtype
1040
1018
1019
+ .. note ::
1020
+ Decimal columns are also "nuisance" columns. They are excluded from aggregate functions automatically in groupby.
1021
+
1022
+ If you do wish to include decimal columns in the aggregation, you must do so explicitly:
1023
+
1024
+ .. ipython :: python
1025
+
1026
+ from decimal import Decimal
1027
+ dec = pd.DataFrame(
1028
+ {' name' : [' foo' , ' bar' , ' foo' , ' bar' ],
1029
+ ' title' : [' boo' , ' far' , ' boo' , ' far' ],
1030
+ ' id' : [123 , 456 , 123 , 456 ],
1031
+ ' int_column' : [1 , 2 , 3 , 4 ],
1032
+ ' dec_column1' : [Decimal(' 0.50' ), Decimal(' 0.15' ), Decimal(' 0.25' ), Decimal(' 0.40' )],
1033
+ ' dec_column2' : [Decimal(' 0.20' ), Decimal(' 0.30' ), Decimal(' 0.55' ), Decimal(' 0.60' )]
1034
+ },
1035
+ columns = [' name' ,' title' ,' id' ,' int_column' ,' dec_column1' ,' dec_column2' ]
1036
+ )
1037
+
1038
+ dec.head()
1039
+
1040
+ dec.dtypes
1041
+
1042
+ # Decimal columns excluded from sum by default
1043
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False ).sum()
1044
+
1045
+ # Decimal columns can be sum'd explicitly by themselves...
1046
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False )[' dec_column1' ,' dec_column2' ].sum()
1047
+
1048
+ # ...but cannot be combined with standard data types or they will be excluded
1049
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False )[' int_column' ,' dec_column1' ,' dec_column2' ].sum()
1050
+
1051
+ # Use .agg function to aggregate over standard and "nuisance" data types at the same time
1052
+ dec.groupby([' name' , ' title' , ' id' ], as_index = False ).agg({' int_column' : ' sum' , ' dec_column1' : ' sum' , ' dec_column2' : ' sum' })
1053
+
1054
+
1041
1055
.. _groupby.missing :
1042
1056
1043
1057
NA and NaT group handling
0 commit comments