@@ -1447,7 +1447,7 @@ def test_attr_wrapper(self):
1447
1447
for name , gp in grouped :
1448
1448
expected [name ] = gp .describe ()
1449
1449
expected = DataFrame (expected ).T
1450
- assert_frame_equal (result . unstack () , expected )
1450
+ assert_frame_equal (result , expected )
1451
1451
1452
1452
# get attribute
1453
1453
result = grouped .dtype
@@ -1459,7 +1459,7 @@ def test_attr_wrapper(self):
1459
1459
def test_series_describe_multikey (self ):
1460
1460
ts = tm .makeTimeSeries ()
1461
1461
grouped = ts .groupby ([lambda x : x .year , lambda x : x .month ])
1462
- result = grouped .describe (). unstack ()
1462
+ result = grouped .describe ()
1463
1463
assert_series_equal (result ['mean' ], grouped .mean (), check_names = False )
1464
1464
assert_series_equal (result ['std' ], grouped .std (), check_names = False )
1465
1465
assert_series_equal (result ['min' ], grouped .min (), check_names = False )
@@ -1468,7 +1468,7 @@ def test_series_describe_single(self):
1468
1468
ts = tm .makeTimeSeries ()
1469
1469
grouped = ts .groupby (lambda x : x .month )
1470
1470
result = grouped .apply (lambda x : x .describe ())
1471
- expected = grouped .describe ()
1471
+ expected = grouped .describe (). stack ()
1472
1472
assert_series_equal (result , expected )
1473
1473
1474
1474
def test_series_index_name (self ):
@@ -1479,17 +1479,27 @@ def test_series_index_name(self):
1479
1479
def test_frame_describe_multikey (self ):
1480
1480
grouped = self .tsframe .groupby ([lambda x : x .year , lambda x : x .month ])
1481
1481
result = grouped .describe ()
1482
-
1482
+ desc_groups = []
1483
1483
for col in self .tsframe :
1484
- expected = grouped [col ].describe ()
1485
- assert_series_equal (result [col ], expected , check_names = False )
1484
+ group = grouped [col ].describe ()
1485
+ group_col = pd .MultiIndex ([[col ] * len (group .columns ),
1486
+ group .columns ],
1487
+ [[0 ] * len (group .columns ),
1488
+ range (len (group .columns ))])
1489
+ group = pd .DataFrame (group .values ,
1490
+ columns = group_col ,
1491
+ index = group .index )
1492
+ desc_groups .append (group )
1493
+ expected = pd .concat (desc_groups , axis = 1 )
1494
+ tm .assert_frame_equal (result , expected )
1486
1495
1487
1496
groupedT = self .tsframe .groupby ({'A' : 0 , 'B' : 0 ,
1488
1497
'C' : 1 , 'D' : 1 }, axis = 1 )
1489
1498
result = groupedT .describe ()
1490
-
1491
- for name , group in groupedT :
1492
- assert_frame_equal (result [name ], group .describe ())
1499
+ expected = self .tsframe .describe ().T
1500
+ expected .index = pd .MultiIndex ([[0 , 0 , 1 , 1 ], expected .index ],
1501
+ [range (4 ), range (len (expected .index ))])
1502
+ tm .assert_frame_equal (result , expected )
1493
1503
1494
1504
def test_frame_describe_tupleindex (self ):
1495
1505
@@ -1499,10 +1509,27 @@ def test_frame_describe_tupleindex(self):
1499
1509
'z' : [100 , 200 , 300 , 400 , 500 ] * 3 })
1500
1510
df1 ['k' ] = [(0 , 0 , 1 ), (0 , 1 , 0 ), (1 , 0 , 0 )] * 5
1501
1511
df2 = df1 .rename (columns = {'k' : 'key' })
1502
- result = df1 .groupby ('k' ).describe ()
1503
- expected = df2 .groupby ('key' ).describe ()
1504
- expected .index .set_names (result .index .names , inplace = True )
1505
- assert_frame_equal (result , expected )
1512
+ tm .assertRaises (ValueError , lambda : df1 .groupby ('k' ).describe ())
1513
+ tm .assertRaises (ValueError , lambda : df2 .groupby ('key' ).describe ())
1514
+
1515
+ def test_frame_describe_unstacked_format (self ):
1516
+ # GH 4792
1517
+ prices = {pd .Timestamp ('2011-01-06 10:59:05' , tz = None ): 24990 ,
1518
+ pd .Timestamp ('2011-01-06 12:43:33' , tz = None ): 25499 ,
1519
+ pd .Timestamp ('2011-01-06 12:54:09' , tz = None ): 25499 }
1520
+ volumes = {pd .Timestamp ('2011-01-06 10:59:05' , tz = None ): 1500000000 ,
1521
+ pd .Timestamp ('2011-01-06 12:43:33' , tz = None ): 5000000000 ,
1522
+ pd .Timestamp ('2011-01-06 12:54:09' , tz = None ): 100000000 }
1523
+ df = pd .DataFrame ({'PRICE' : prices ,
1524
+ 'VOLUME' : volumes })
1525
+ result = df .groupby ('PRICE' ).VOLUME .describe ()
1526
+ data = [df [df .PRICE == 24990 ].VOLUME .describe ().values .tolist (),
1527
+ df [df .PRICE == 25499 ].VOLUME .describe ().values .tolist ()]
1528
+ expected = pd .DataFrame (data ,
1529
+ index = pd .Index ([24990 , 25499 ], name = 'PRICE' ),
1530
+ columns = ['count' , 'mean' , 'std' , 'min' ,
1531
+ '25%' , '50%' , '75%' , 'max' ])
1532
+ tm .assert_frame_equal (result , expected )
1506
1533
1507
1534
def test_frame_groupby (self ):
1508
1535
grouped = self .tsframe .groupby (lambda x : x .weekday ())
@@ -2994,16 +3021,21 @@ def test_non_cython_api(self):
2994
3021
assert_frame_equal (result , expected )
2995
3022
2996
3023
# describe
2997
- expected = DataFrame (dict (B = concat (
2998
- [df .loc [[0 , 1 ], 'B' ].describe (), df .loc [[2 ], 'B' ].describe ()],
2999
- keys = [1 , 3 ])))
3000
- expected .index .names = ['A' , None ]
3024
+ expected_index = pd .Index ([1 , 3 ], name = 'A' )
3025
+ expected_col = pd .MultiIndex (levels = [['B' ],
3026
+ ['count' , 'mean' , 'std' , 'min' ,
3027
+ '25%' , '50%' , '75%' , 'max' ]],
3028
+ labels = [[0 ] * 8 , list (range (8 ))])
3029
+ expected = pd .DataFrame ([[1.0 , 2.0 , nan , 2.0 , 2.0 , 2.0 , 2.0 , 2.0 ],
3030
+ [0.0 , nan , nan , nan , nan , nan , nan , nan ]],
3031
+ index = expected_index ,
3032
+ columns = expected_col )
3001
3033
result = g .describe ()
3002
3034
assert_frame_equal (result , expected )
3003
3035
3004
- expected = concat (
3005
- [ df . loc [[ 0 , 1 ], [ 'A' , 'B' ]] .describe (),
3006
- df . loc [[ 2 ], [ 'A' , 'B' ]]. describe ()], keys = [0 , 1 ])
3036
+ expected = pd . concat ([ df [ df . A == 1 ]. describe (). unstack (). to_frame (). T ,
3037
+ df [ df . A == 3 ] .describe (). unstack (). to_frame (). T ])
3038
+ expected . index = pd . Index ( [0 , 1 ])
3007
3039
result = gni .describe ()
3008
3040
assert_frame_equal (result , expected )
3009
3041
@@ -5157,7 +5189,6 @@ def test_groupby_whitelist(self):
5157
5189
'tail' ,
5158
5190
'cumcount' ,
5159
5191
'resample' ,
5160
- 'describe' ,
5161
5192
'rank' ,
5162
5193
'quantile' ,
5163
5194
'fillna' ,
@@ -5194,7 +5225,6 @@ def test_groupby_whitelist(self):
5194
5225
'tail' ,
5195
5226
'cumcount' ,
5196
5227
'resample' ,
5197
- 'describe' ,
5198
5228
'rank' ,
5199
5229
'quantile' ,
5200
5230
'fillna' ,
0 commit comments