@@ -992,6 +992,68 @@ def test_frame_describe_unstacked_format():
992
992
tm .assert_frame_equal (result , expected )
993
993
994
994
995
+ @pytest .mark .filterwarnings (
996
+ "ignore:"
997
+ "indexing past lexsort depth may impact performance:"
998
+ "pandas.errors.PerformanceWarning"
999
+ )
1000
+ @pytest .mark .parametrize ("as_index" , [True , False ])
1001
+ def test_describe_with_duplicate_output_column_names (as_index ):
1002
+ # GH 35314
1003
+ df = pd .DataFrame (
1004
+ {
1005
+ "a" : [99 , 99 , 99 , 88 , 88 , 88 ],
1006
+ "b" : [1 , 2 , 3 , 4 , 5 , 6 ],
1007
+ "c" : [10 , 20 , 30 , 40 , 50 , 60 ],
1008
+ },
1009
+ columns = ["a" , "b" , "b" ],
1010
+ )
1011
+
1012
+ expected = (
1013
+ pd .DataFrame .from_records (
1014
+ [
1015
+ ("a" , "count" , 3.0 , 3.0 ),
1016
+ ("a" , "mean" , 88.0 , 99.0 ),
1017
+ ("a" , "std" , 0.0 , 0.0 ),
1018
+ ("a" , "min" , 88.0 , 99.0 ),
1019
+ ("a" , "25%" , 88.0 , 99.0 ),
1020
+ ("a" , "50%" , 88.0 , 99.0 ),
1021
+ ("a" , "75%" , 88.0 , 99.0 ),
1022
+ ("a" , "max" , 88.0 , 99.0 ),
1023
+ ("b" , "count" , 3.0 , 3.0 ),
1024
+ ("b" , "mean" , 5.0 , 2.0 ),
1025
+ ("b" , "std" , 1.0 , 1.0 ),
1026
+ ("b" , "min" , 4.0 , 1.0 ),
1027
+ ("b" , "25%" , 4.5 , 1.5 ),
1028
+ ("b" , "50%" , 5.0 , 2.0 ),
1029
+ ("b" , "75%" , 5.5 , 2.5 ),
1030
+ ("b" , "max" , 6.0 , 3.0 ),
1031
+ ("b" , "count" , 3.0 , 3.0 ),
1032
+ ("b" , "mean" , 5.0 , 2.0 ),
1033
+ ("b" , "std" , 1.0 , 1.0 ),
1034
+ ("b" , "min" , 4.0 , 1.0 ),
1035
+ ("b" , "25%" , 4.5 , 1.5 ),
1036
+ ("b" , "50%" , 5.0 , 2.0 ),
1037
+ ("b" , "75%" , 5.5 , 2.5 ),
1038
+ ("b" , "max" , 6.0 , 3.0 ),
1039
+ ],
1040
+ )
1041
+ .set_index ([0 , 1 ])
1042
+ .T
1043
+ )
1044
+ expected .columns .names = [None , None ]
1045
+ expected .index = pd .Index ([88 , 99 ], name = "a" )
1046
+
1047
+ if as_index :
1048
+ expected = expected .drop (columns = ["a" ], level = 0 )
1049
+ else :
1050
+ expected = expected .reset_index (drop = True )
1051
+
1052
+ result = df .groupby ("a" , as_index = as_index ).describe ()
1053
+
1054
+ tm .assert_frame_equal (result , expected )
1055
+
1056
+
995
1057
def test_groupby_mean_no_overflow ():
996
1058
# Regression test for (#22487)
997
1059
df = pd .DataFrame (
0 commit comments