@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
1063
1063
pd .testing .assert_frame_equal (res , expected )
1064
1064
1065
1065
1066
+ @pytest .mark .parametrize (
1067
+ "grp_col_dict, exp_data" ,
1068
+ [
1069
+ ({"nr" : "min" , "cat_ord" : "min" }, {"nr" : [1 , 5 ], "cat_ord" : ["a" , "c" ]}),
1070
+ ({"cat_ord" : "min" }, {"cat_ord" : ["a" , "c" ]}),
1071
+ ({"nr" : "min" }, {"nr" : [1 , 5 ]}),
1072
+ ],
1073
+ )
1074
+ def test_groupby_single_agg_cat_cols (grp_col_dict , exp_data ):
1075
+ # test single aggregations on ordered categorical cols GHGH27800
1076
+
1077
+ # create the result dataframe
1078
+ input_df = pd .DataFrame (
1079
+ {
1080
+ "nr" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
1081
+ "cat_ord" : list ("aabbccdd" ),
1082
+ "cat" : list ("aaaabbbb" ),
1083
+ }
1084
+ )
1085
+
1086
+ input_df = input_df .astype ({"cat" : "category" , "cat_ord" : "category" })
1087
+ input_df ["cat_ord" ] = input_df ["cat_ord" ].cat .as_ordered ()
1088
+ result_df = input_df .groupby ("cat" ).agg (grp_col_dict )
1089
+
1090
+ # create expected dataframe
1091
+ cat_index = pd .CategoricalIndex (
1092
+ ["a" , "b" ], categories = ["a" , "b" ], ordered = False , name = "cat" , dtype = "category"
1093
+ )
1094
+
1095
+ expected_df = pd .DataFrame (data = exp_data , index = cat_index )
1096
+
1097
+ tm .assert_frame_equal (result_df , expected_df )
1098
+
1099
+
1100
+ @pytest .mark .parametrize (
1101
+ "grp_col_dict, exp_data" ,
1102
+ [
1103
+ ({"nr" : ["min" , "max" ], "cat_ord" : "min" }, [(1 , 4 , "a" ), (5 , 8 , "c" )]),
1104
+ ({"nr" : "min" , "cat_ord" : ["min" , "max" ]}, [(1 , "a" , "b" ), (5 , "c" , "d" )]),
1105
+ ({"cat_ord" : ["min" , "max" ]}, [("a" , "b" ), ("c" , "d" )]),
1106
+ ],
1107
+ )
1108
+ def test_groupby_combined_aggs_cat_cols (grp_col_dict , exp_data ):
1109
+ # test combined aggregations on ordered categorical cols GH27800
1110
+
1111
+ # create the result dataframe
1112
+ input_df = pd .DataFrame (
1113
+ {
1114
+ "nr" : [1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ],
1115
+ "cat_ord" : list ("aabbccdd" ),
1116
+ "cat" : list ("aaaabbbb" ),
1117
+ }
1118
+ )
1119
+
1120
+ input_df = input_df .astype ({"cat" : "category" , "cat_ord" : "category" })
1121
+ input_df ["cat_ord" ] = input_df ["cat_ord" ].cat .as_ordered ()
1122
+ result_df = input_df .groupby ("cat" ).agg (grp_col_dict )
1123
+
1124
+ # create expected dataframe
1125
+ cat_index = pd .CategoricalIndex (
1126
+ ["a" , "b" ], categories = ["a" , "b" ], ordered = False , name = "cat" , dtype = "category"
1127
+ )
1128
+
1129
+ # unpack the grp_col_dict to create the multi-index tuple
1130
+ # this tuple will be used to create the expected dataframe index
1131
+ multi_index_list = []
1132
+ for k , v in grp_col_dict .items ():
1133
+ if isinstance (v , list ):
1134
+ for value in v :
1135
+ multi_index_list .append ([k , value ])
1136
+ else :
1137
+ multi_index_list .append ([k , v ])
1138
+ multi_index = pd .MultiIndex .from_tuples (tuple (multi_index_list ))
1139
+
1140
+ expected_df = pd .DataFrame (data = exp_data , columns = multi_index , index = cat_index )
1141
+
1142
+ tm .assert_frame_equal (result_df , expected_df )
1143
+
1144
+
1066
1145
def test_nonagg_agg ():
1067
1146
# GH 35490 - Single/Multiple agg of non-agg function give same results
1068
1147
# TODO: agg should raise for functions that don't aggregate
0 commit comments