Skip to content

Commit 969945e

Browse files
authored
TST: adding test for .describe() with duplicate columns (#35424)
1 parent a0c8425 commit 969945e

File tree

1 file changed

+62
-0
lines changed

1 file changed

+62
-0
lines changed

pandas/tests/groupby/test_function.py

+62
Original file line numberDiff line numberDiff line change
@@ -992,6 +992,68 @@ def test_frame_describe_unstacked_format():
992992
tm.assert_frame_equal(result, expected)
993993

994994

995+
@pytest.mark.filterwarnings(
996+
"ignore:"
997+
"indexing past lexsort depth may impact performance:"
998+
"pandas.errors.PerformanceWarning"
999+
)
1000+
@pytest.mark.parametrize("as_index", [True, False])
1001+
def test_describe_with_duplicate_output_column_names(as_index):
1002+
# GH 35314
1003+
df = pd.DataFrame(
1004+
{
1005+
"a": [99, 99, 99, 88, 88, 88],
1006+
"b": [1, 2, 3, 4, 5, 6],
1007+
"c": [10, 20, 30, 40, 50, 60],
1008+
},
1009+
columns=["a", "b", "b"],
1010+
)
1011+
1012+
expected = (
1013+
pd.DataFrame.from_records(
1014+
[
1015+
("a", "count", 3.0, 3.0),
1016+
("a", "mean", 88.0, 99.0),
1017+
("a", "std", 0.0, 0.0),
1018+
("a", "min", 88.0, 99.0),
1019+
("a", "25%", 88.0, 99.0),
1020+
("a", "50%", 88.0, 99.0),
1021+
("a", "75%", 88.0, 99.0),
1022+
("a", "max", 88.0, 99.0),
1023+
("b", "count", 3.0, 3.0),
1024+
("b", "mean", 5.0, 2.0),
1025+
("b", "std", 1.0, 1.0),
1026+
("b", "min", 4.0, 1.0),
1027+
("b", "25%", 4.5, 1.5),
1028+
("b", "50%", 5.0, 2.0),
1029+
("b", "75%", 5.5, 2.5),
1030+
("b", "max", 6.0, 3.0),
1031+
("b", "count", 3.0, 3.0),
1032+
("b", "mean", 5.0, 2.0),
1033+
("b", "std", 1.0, 1.0),
1034+
("b", "min", 4.0, 1.0),
1035+
("b", "25%", 4.5, 1.5),
1036+
("b", "50%", 5.0, 2.0),
1037+
("b", "75%", 5.5, 2.5),
1038+
("b", "max", 6.0, 3.0),
1039+
],
1040+
)
1041+
.set_index([0, 1])
1042+
.T
1043+
)
1044+
expected.columns.names = [None, None]
1045+
expected.index = pd.Index([88, 99], name="a")
1046+
1047+
if as_index:
1048+
expected = expected.drop(columns=["a"], level=0)
1049+
else:
1050+
expected = expected.reset_index(drop=True)
1051+
1052+
result = df.groupby("a", as_index=as_index).describe()
1053+
1054+
tm.assert_frame_equal(result, expected)
1055+
1056+
9951057
def test_groupby_mean_no_overflow():
9961058
# Regression test for (#22487)
9971059
df = pd.DataFrame(

0 commit comments

Comments
 (0)