Skip to content

Commit 42649fb

Browse files
mathurk1jbrockmendel
authored andcommitted
TST: add test for agg on ordered categorical cols (pandas-dev#35630)
1 parent 9e64be3 commit 42649fb

File tree

1 file changed

+79
-0
lines changed

1 file changed

+79
-0
lines changed

pandas/tests/groupby/aggregate/test_aggregate.py

+79
Original file line numberDiff line numberDiff line change
@@ -1063,6 +1063,85 @@ def test_groupby_get_by_index():
10631063
pd.testing.assert_frame_equal(res, expected)
10641064

10651065

1066+
@pytest.mark.parametrize(
1067+
"grp_col_dict, exp_data",
1068+
[
1069+
({"nr": "min", "cat_ord": "min"}, {"nr": [1, 5], "cat_ord": ["a", "c"]}),
1070+
({"cat_ord": "min"}, {"cat_ord": ["a", "c"]}),
1071+
({"nr": "min"}, {"nr": [1, 5]}),
1072+
],
1073+
)
1074+
def test_groupby_single_agg_cat_cols(grp_col_dict, exp_data):
1075+
# test single aggregations on ordered categorical cols GHGH27800
1076+
1077+
# create the result dataframe
1078+
input_df = pd.DataFrame(
1079+
{
1080+
"nr": [1, 2, 3, 4, 5, 6, 7, 8],
1081+
"cat_ord": list("aabbccdd"),
1082+
"cat": list("aaaabbbb"),
1083+
}
1084+
)
1085+
1086+
input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
1087+
input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
1088+
result_df = input_df.groupby("cat").agg(grp_col_dict)
1089+
1090+
# create expected dataframe
1091+
cat_index = pd.CategoricalIndex(
1092+
["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
1093+
)
1094+
1095+
expected_df = pd.DataFrame(data=exp_data, index=cat_index)
1096+
1097+
tm.assert_frame_equal(result_df, expected_df)
1098+
1099+
1100+
@pytest.mark.parametrize(
1101+
"grp_col_dict, exp_data",
1102+
[
1103+
({"nr": ["min", "max"], "cat_ord": "min"}, [(1, 4, "a"), (5, 8, "c")]),
1104+
({"nr": "min", "cat_ord": ["min", "max"]}, [(1, "a", "b"), (5, "c", "d")]),
1105+
({"cat_ord": ["min", "max"]}, [("a", "b"), ("c", "d")]),
1106+
],
1107+
)
1108+
def test_groupby_combined_aggs_cat_cols(grp_col_dict, exp_data):
1109+
# test combined aggregations on ordered categorical cols GH27800
1110+
1111+
# create the result dataframe
1112+
input_df = pd.DataFrame(
1113+
{
1114+
"nr": [1, 2, 3, 4, 5, 6, 7, 8],
1115+
"cat_ord": list("aabbccdd"),
1116+
"cat": list("aaaabbbb"),
1117+
}
1118+
)
1119+
1120+
input_df = input_df.astype({"cat": "category", "cat_ord": "category"})
1121+
input_df["cat_ord"] = input_df["cat_ord"].cat.as_ordered()
1122+
result_df = input_df.groupby("cat").agg(grp_col_dict)
1123+
1124+
# create expected dataframe
1125+
cat_index = pd.CategoricalIndex(
1126+
["a", "b"], categories=["a", "b"], ordered=False, name="cat", dtype="category"
1127+
)
1128+
1129+
# unpack the grp_col_dict to create the multi-index tuple
1130+
# this tuple will be used to create the expected dataframe index
1131+
multi_index_list = []
1132+
for k, v in grp_col_dict.items():
1133+
if isinstance(v, list):
1134+
for value in v:
1135+
multi_index_list.append([k, value])
1136+
else:
1137+
multi_index_list.append([k, v])
1138+
multi_index = pd.MultiIndex.from_tuples(tuple(multi_index_list))
1139+
1140+
expected_df = pd.DataFrame(data=exp_data, columns=multi_index, index=cat_index)
1141+
1142+
tm.assert_frame_equal(result_df, expected_df)
1143+
1144+
10661145
def test_nonagg_agg():
10671146
# GH 35490 - Single/Multiple agg of non-agg function give same results
10681147
# TODO: agg should raise for functions that don't aggregate

0 commit comments

Comments
 (0)