@@ -1085,7 +1085,7 @@ def test_attr_wrapper(self):
1085
1085
for name , gp in grouped :
1086
1086
expected [name ] = gp .describe ()
1087
1087
expected = DataFrame (expected ).T
1088
- assert_frame_equal (result . unstack () , expected )
1088
+ assert_frame_equal (result , expected )
1089
1089
1090
1090
# get attribute
1091
1091
result = grouped .dtype
@@ -1097,7 +1097,7 @@ def test_attr_wrapper(self):
1097
1097
def test_series_describe_multikey (self ):
1098
1098
ts = tm .makeTimeSeries ()
1099
1099
grouped = ts .groupby ([lambda x : x .year , lambda x : x .month ])
1100
- result = grouped .describe (). unstack ()
1100
+ result = grouped .describe ()
1101
1101
assert_series_equal (result ['mean' ], grouped .mean (), check_names = False )
1102
1102
assert_series_equal (result ['std' ], grouped .std (), check_names = False )
1103
1103
assert_series_equal (result ['min' ], grouped .min (), check_names = False )
@@ -1106,7 +1106,7 @@ def test_series_describe_single(self):
1106
1106
ts = tm .makeTimeSeries ()
1107
1107
grouped = ts .groupby (lambda x : x .month )
1108
1108
result = grouped .apply (lambda x : x .describe ())
1109
- expected = grouped .describe ()
1109
+ expected = grouped .describe (). stack ()
1110
1110
assert_series_equal (result , expected )
1111
1111
1112
1112
def test_series_index_name (self ):
@@ -1117,17 +1117,27 @@ def test_series_index_name(self):
1117
1117
def test_frame_describe_multikey (self ):
1118
1118
grouped = self .tsframe .groupby ([lambda x : x .year , lambda x : x .month ])
1119
1119
result = grouped .describe ()
1120
-
1120
+ desc_groups = []
1121
1121
for col in self .tsframe :
1122
- expected = grouped [col ].describe ()
1123
- assert_series_equal (result [col ], expected , check_names = False )
1122
+ group = grouped [col ].describe ()
1123
+ group_col = pd .MultiIndex ([[col ] * len (group .columns ),
1124
+ group .columns ],
1125
+ [[0 ] * len (group .columns ),
1126
+ range (len (group .columns ))])
1127
+ group = pd .DataFrame (group .values ,
1128
+ columns = group_col ,
1129
+ index = group .index )
1130
+ desc_groups .append (group )
1131
+ expected = pd .concat (desc_groups , axis = 1 )
1132
+ tm .assert_frame_equal (result , expected )
1124
1133
1125
1134
groupedT = self .tsframe .groupby ({'A' : 0 , 'B' : 0 ,
1126
1135
'C' : 1 , 'D' : 1 }, axis = 1 )
1127
1136
result = groupedT .describe ()
1128
-
1129
- for name , group in groupedT :
1130
- assert_frame_equal (result [name ], group .describe ())
1137
+ expected = self .tsframe .describe ().T
1138
+ expected .index = pd .MultiIndex ([[0 , 0 , 1 , 1 ], expected .index ],
1139
+ [range (4 ), range (len (expected .index ))])
1140
+ tm .assert_frame_equal (result , expected )
1131
1141
1132
1142
def test_frame_describe_tupleindex (self ):
1133
1143
@@ -1137,10 +1147,27 @@ def test_frame_describe_tupleindex(self):
1137
1147
'z' : [100 , 200 , 300 , 400 , 500 ] * 3 })
1138
1148
df1 ['k' ] = [(0 , 0 , 1 ), (0 , 1 , 0 ), (1 , 0 , 0 )] * 5
1139
1149
df2 = df1 .rename (columns = {'k' : 'key' })
1140
- result = df1 .groupby ('k' ).describe ()
1141
- expected = df2 .groupby ('key' ).describe ()
1142
- expected .index .set_names (result .index .names , inplace = True )
1143
- assert_frame_equal (result , expected )
1150
+ tm .assertRaises (ValueError , lambda : df1 .groupby ('k' ).describe ())
1151
+ tm .assertRaises (ValueError , lambda : df2 .groupby ('key' ).describe ())
1152
+
1153
+ def test_frame_describe_unstacked_format (self ):
1154
+ # GH 4792
1155
+ prices = {pd .Timestamp ('2011-01-06 10:59:05' , tz = None ): 24990 ,
1156
+ pd .Timestamp ('2011-01-06 12:43:33' , tz = None ): 25499 ,
1157
+ pd .Timestamp ('2011-01-06 12:54:09' , tz = None ): 25499 }
1158
+ volumes = {pd .Timestamp ('2011-01-06 10:59:05' , tz = None ): 1500000000 ,
1159
+ pd .Timestamp ('2011-01-06 12:43:33' , tz = None ): 5000000000 ,
1160
+ pd .Timestamp ('2011-01-06 12:54:09' , tz = None ): 100000000 }
1161
+ df = pd .DataFrame ({'PRICE' : prices ,
1162
+ 'VOLUME' : volumes })
1163
+ result = df .groupby ('PRICE' ).VOLUME .describe ()
1164
+ data = [df [df .PRICE == 24990 ].VOLUME .describe ().values .tolist (),
1165
+ df [df .PRICE == 25499 ].VOLUME .describe ().values .tolist ()]
1166
+ expected = pd .DataFrame (data ,
1167
+ index = pd .Index ([24990 , 25499 ], name = 'PRICE' ),
1168
+ columns = ['count' , 'mean' , 'std' , 'min' ,
1169
+ '25%' , '50%' , '75%' , 'max' ])
1170
+ tm .assert_frame_equal (result , expected )
1144
1171
1145
1172
def test_frame_groupby (self ):
1146
1173
grouped = self .tsframe .groupby (lambda x : x .weekday ())
@@ -2545,16 +2572,21 @@ def test_non_cython_api(self):
2545
2572
assert_frame_equal (result , expected )
2546
2573
2547
2574
# describe
2548
- expected = DataFrame (dict (B = concat (
2549
- [df .loc [[0 , 1 ], 'B' ].describe (), df .loc [[2 ], 'B' ].describe ()],
2550
- keys = [1 , 3 ])))
2551
- expected .index .names = ['A' , None ]
2575
+ expected_index = pd .Index ([1 , 3 ], name = 'A' )
2576
+ expected_col = pd .MultiIndex (levels = [['B' ],
2577
+ ['count' , 'mean' , 'std' , 'min' ,
2578
+ '25%' , '50%' , '75%' , 'max' ]],
2579
+ labels = [[0 ] * 8 , list (range (8 ))])
2580
+ expected = pd .DataFrame ([[1.0 , 2.0 , nan , 2.0 , 2.0 , 2.0 , 2.0 , 2.0 ],
2581
+ [0.0 , nan , nan , nan , nan , nan , nan , nan ]],
2582
+ index = expected_index ,
2583
+ columns = expected_col )
2552
2584
result = g .describe ()
2553
2585
assert_frame_equal (result , expected )
2554
2586
2555
- expected = concat (
2556
- [ df . loc [[ 0 , 1 ], [ 'A' , 'B' ]] .describe (),
2557
- df . loc [[ 2 ], [ 'A' , 'B' ]]. describe ()], keys = [0 , 1 ])
2587
+ expected = pd . concat ([ df [ df . A == 1 ]. describe (). unstack (). to_frame (). T ,
2588
+ df [ df . A == 3 ] .describe (). unstack (). to_frame (). T ])
2589
+ expected . index = pd . Index ( [0 , 1 ])
2558
2590
result = gni .describe ()
2559
2591
assert_frame_equal (result , expected )
2560
2592
@@ -3872,7 +3904,6 @@ def test_groupby_whitelist(self):
3872
3904
'tail' ,
3873
3905
'cumcount' ,
3874
3906
'resample' ,
3875
- 'describe' ,
3876
3907
'rank' ,
3877
3908
'quantile' ,
3878
3909
'fillna' ,
@@ -3909,7 +3940,6 @@ def test_groupby_whitelist(self):
3909
3940
'tail' ,
3910
3941
'cumcount' ,
3911
3942
'resample' ,
3912
- 'describe' ,
3913
3943
'rank' ,
3914
3944
'quantile' ,
3915
3945
'fillna' ,
0 commit comments