@@ -744,28 +744,32 @@ def test_resample_consistency(self):
744
744
745
745
def test_resample_timegrouper (self ):
746
746
# GH 7227
747
- dates = [datetime (2014 , 10 , 1 ), datetime (2014 , 9 , 3 ),
747
+ dates1 = [datetime (2014 , 10 , 1 ), datetime (2014 , 9 , 3 ),
748
748
datetime (2014 , 11 , 5 ), datetime (2014 , 9 , 5 ),
749
749
datetime (2014 , 10 , 8 ), datetime (2014 , 7 , 15 )]
750
750
751
- df = DataFrame (dict (A = dates , B = np .arange (len (dates ))))
752
- result = df .set_index ('A' ).resample ('M' , how = 'count' )
753
- exp_idx = pd .DatetimeIndex (['2014-07-31' , '2014-08-31' , '2014-09-30' ,
754
- '2014-10-31' , '2014-11-30' ], freq = 'M' , name = 'A' )
755
- expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ]}, index = exp_idx )
756
- assert_frame_equal (result , expected )
751
+ dates2 = dates1 [:2 ] + [pd .NaT ] + dates1 [2 :4 ] + [pd .NaT ] + dates1 [4 :]
752
+ dates3 = [pd .NaT ] + dates1 + [pd .NaT ]
757
753
758
- result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
759
- assert_frame_equal (result , expected )
754
+ for dates in [dates1 , dates2 , dates3 ]:
755
+ df = DataFrame (dict (A = dates , B = np .arange (len (dates ))))
756
+ result = df .set_index ('A' ).resample ('M' , how = 'count' )
757
+ exp_idx = pd .DatetimeIndex (['2014-07-31' , '2014-08-31' , '2014-09-30' ,
758
+ '2014-10-31' , '2014-11-30' ], freq = 'M' , name = 'A' )
759
+ expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ]}, index = exp_idx )
760
+ assert_frame_equal (result , expected )
760
761
761
- df = DataFrame (dict (A = dates , B = np .arange (len (dates )), C = np .arange (len (dates ))))
762
- result = df .set_index ('A' ).resample ('M' , how = 'count' )
763
- expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ], 'C' : [1 , 0 , 2 , 2 , 1 ]},
764
- index = exp_idx , columns = ['B' , 'C' ])
765
- assert_frame_equal (result , expected )
762
+ result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
763
+ assert_frame_equal (result , expected )
766
764
767
- result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
768
- assert_frame_equal (result , expected )
765
+ df = DataFrame (dict (A = dates , B = np .arange (len (dates )), C = np .arange (len (dates ))))
766
+ result = df .set_index ('A' ).resample ('M' , how = 'count' )
767
+ expected = DataFrame ({'B' : [1 , 0 , 2 , 2 , 1 ], 'C' : [1 , 0 , 2 , 2 , 1 ]},
768
+ index = exp_idx , columns = ['B' , 'C' ])
769
+ assert_frame_equal (result , expected )
770
+
771
+ result = df .groupby (pd .Grouper (freq = 'M' , key = 'A' )).count ()
772
+ assert_frame_equal (result , expected )
769
773
770
774
771
775
def _simple_ts (start , end , freq = 'D' ):
@@ -1302,6 +1306,84 @@ def test_fails_on_no_datetime_index(self):
1302
1306
"but got an instance of %r" % name ):
1303
1307
df .groupby (TimeGrouper ('D' ))
1304
1308
1309
+ def test_aggregate_normal (self ):
1310
+ # check TimeGrouper's aggregation is identical as normal groupby
1311
+
1312
+ n = 20
1313
+ data = np .random .randn (n , 4 )
1314
+ normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1315
+ normal_df ['key' ] = [1 , 2 , 3 , 4 , 5 ] * 4
1316
+
1317
+ dt_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1318
+ dt_df ['key' ] = [datetime (2013 , 1 , 1 ), datetime (2013 , 1 , 2 ), datetime (2013 , 1 , 3 ),
1319
+ datetime (2013 , 1 , 4 ), datetime (2013 , 1 , 5 )] * 4
1320
+
1321
+ normal_grouped = normal_df .groupby ('key' )
1322
+ dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
1323
+
1324
+ for func in ['min' , 'max' , 'prod' , 'var' , 'std' , 'mean' ]:
1325
+ expected = getattr (normal_grouped , func )()
1326
+ dt_result = getattr (dt_grouped , func )()
1327
+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1328
+ assert_frame_equal (expected , dt_result )
1329
+
1330
+ for func in ['count' , 'sum' ]:
1331
+ expected = getattr (normal_grouped , func )()
1332
+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1333
+ dt_result = getattr (dt_grouped , func )()
1334
+ assert_frame_equal (expected , dt_result )
1335
+
1336
+ """
1337
+ for func in ['first', 'last']:
1338
+ expected = getattr(normal_grouped, func)()
1339
+ expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
1340
+ dt_result = getattr(dt_grouped, func)()
1341
+ assert_frame_equal(expected, dt_result)
1342
+
1343
+ for func in ['nth']:
1344
+ expected = getattr(normal_grouped, func)(3)
1345
+ expected.index = date_range(start='2013-01-01', freq='D', periods=5, name='key')
1346
+ dt_result = getattr(dt_grouped, func)(3)
1347
+ assert_frame_equal(expected, dt_result)
1348
+ """
1349
+ # if TimeGrouper is used included, 'size' 'first','last' and 'nth' doesn't work yet
1350
+
1351
+ def test_aggregate_with_nat (self ):
1352
+ # check TimeGrouper's aggregation is identical as normal groupby
1353
+
1354
+ n = 20
1355
+ data = np .random .randn (n , 4 )
1356
+ normal_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1357
+ normal_df ['key' ] = [1 , 2 , np .nan , 4 , 5 ] * 4
1358
+
1359
+ dt_df = DataFrame (data , columns = ['A' , 'B' , 'C' , 'D' ])
1360
+ dt_df ['key' ] = [datetime (2013 , 1 , 1 ), datetime (2013 , 1 , 2 ), pd .NaT ,
1361
+ datetime (2013 , 1 , 4 ), datetime (2013 , 1 , 5 )] * 4
1362
+
1363
+ normal_grouped = normal_df .groupby ('key' )
1364
+ dt_grouped = dt_df .groupby (TimeGrouper (key = 'key' , freq = 'D' ))
1365
+
1366
+ for func in ['min' , 'max' , 'prod' ]:
1367
+ normal_result = getattr (normal_grouped , func )()
1368
+ dt_result = getattr (dt_grouped , func )()
1369
+ pad = DataFrame ([[np .nan , np .nan , np .nan , np .nan ]],
1370
+ index = [3 ], columns = ['A' , 'B' , 'C' , 'D' ])
1371
+ expected = normal_result .append (pad )
1372
+ expected = expected .sort_index ()
1373
+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1374
+ assert_frame_equal (expected , dt_result )
1375
+
1376
+ for func in ['count' , 'sum' ]:
1377
+ normal_result = getattr (normal_grouped , func )()
1378
+ pad = DataFrame ([[0 , 0 , 0 , 0 ]], index = [3 ], columns = ['A' , 'B' , 'C' , 'D' ])
1379
+ expected = normal_result .append (pad )
1380
+ expected = expected .sort_index ()
1381
+ expected .index = date_range (start = '2013-01-01' , freq = 'D' , periods = 5 , name = 'key' )
1382
+ dt_result = getattr (dt_grouped , func )()
1383
+ assert_frame_equal (expected , dt_result )
1384
+
1385
+ # if NaT is included, 'var', 'std', 'mean', 'size', 'first','last' and 'nth' doesn't work yet
1386
+
1305
1387
1306
1388
if __name__ == '__main__' :
1307
1389
nose .runmodule (argv = [__file__ , '-vvs' , '-x' , '--pdb' , '--pdb-failure' ],
0 commit comments