3
3
from datetime import datetime , timedelta
4
4
from functools import partial
5
5
6
- from pandas . compat import range , lrange , zip , product , OrderedDict
6
+ import nose
7
7
import numpy as np
8
8
9
+ import pandas as pd
10
+ import pandas .tseries .offsets as offsets
11
+ import pandas .util .testing as tm
9
12
from pandas import (Series , DataFrame , Panel , Index , isnull ,
10
13
notnull , Timestamp )
11
-
14
+ from pandas .compat import range , lrange , zip , product , OrderedDict
15
+ from pandas .core .base import SpecificationError
16
+ from pandas .core .common import ABCSeries , ABCDataFrame
12
17
from pandas .core .groupby import DataError
18
+ from pandas .tseries .frequencies import MONTHS , DAYS
13
19
from pandas .tseries .index import date_range
14
- from pandas .tseries .tdi import timedelta_range
15
20
from pandas .tseries .offsets import Minute , BDay
16
21
from pandas .tseries .period import period_range , PeriodIndex , Period
17
22
from pandas .tseries .resample import (DatetimeIndex , TimeGrouper ,
18
23
DatetimeIndexResampler )
19
- from pandas .tseries .frequencies import MONTHS , DAYS
20
- from pandas .core .common import ABCSeries , ABCDataFrame
21
- from pandas .core .base import SpecificationError
22
-
23
- import pandas .tseries .offsets as offsets
24
- import pandas as pd
25
-
26
- import nose
27
-
24
+ from pandas .tseries .tdi import timedelta_range
28
25
from pandas .util .testing import (assert_series_equal , assert_almost_equal ,
29
26
assert_frame_equal )
30
- import pandas .util .testing as tm
31
27
32
28
bday = BDay ()
33
29
30
+ # The various methods we support
31
+ downsample_methods = ['min' , 'max' , 'first' , 'last' , 'sum' , 'mean' , 'sem' ,
32
+ 'median' , 'prod' , 'var' , 'ohlc' ]
33
+ upsample_methods = ['count' , 'size' ]
34
+ series_methods = ['nunique' ]
35
+ resample_methods = downsample_methods + upsample_methods + series_methods
36
+
34
37
35
38
class TestResampleAPI (tm .TestCase ):
36
39
_multiprocess_can_split_ = True
@@ -95,12 +98,13 @@ def test_api_changes_v018(self):
95
98
self .assertRaises (ValueError , lambda : r .iat [0 ])
96
99
self .assertRaises (ValueError , lambda : r .ix [0 ])
97
100
self .assertRaises (ValueError , lambda : r .loc [
98
- Timestamp ('2013-01-01 00:00:00' , offset = 'H' )])
101
+ Timestamp ('2013-01-01 00:00:00' , offset = 'H' )])
99
102
self .assertRaises (ValueError , lambda : r .at [
100
- Timestamp ('2013-01-01 00:00:00' , offset = 'H' )])
103
+ Timestamp ('2013-01-01 00:00:00' , offset = 'H' )])
101
104
102
105
def f ():
103
106
r [0 ] = 5
107
+
104
108
self .assertRaises (ValueError , f )
105
109
106
110
# str/repr
@@ -144,7 +148,6 @@ def f():
144
148
145
149
# comparison ops
146
150
for op in ['__lt__' , '__le__' , '__gt__' , '__ge__' , '__eq__' , '__ne__' ]:
147
-
148
151
r = self .series .resample ('H' )
149
152
150
153
with tm .assert_produces_warning (FutureWarning ,
@@ -259,6 +262,7 @@ def test_attribute_access(self):
259
262
# setting
260
263
def f ():
261
264
r .F = 'bah'
265
+
262
266
self .assertRaises (ValueError , f )
263
267
264
268
def test_api_compat_before_use (self ):
@@ -509,10 +513,10 @@ def test_agg_misc(self):
509
513
# errors
510
514
# invalid names in the agg specification
511
515
for t in [r , g ]:
512
-
513
516
def f ():
514
517
r [['A' ]].agg ({'A' : ['sum' , 'std' ],
515
518
'B' : ['mean' , 'std' ]})
519
+
516
520
self .assertRaises (SpecificationError , f )
517
521
518
522
def test_agg_nested_dicts (self ):
@@ -648,8 +652,7 @@ def test_resample_how(self):
648
652
grouplist [1 :6 ] = 1
649
653
grouplist [6 :11 ] = 2
650
654
grouplist [11 :] = 3
651
- args = ['sum' , 'mean' , 'std' , 'sem' , 'max' , 'min' , 'median' , 'first' ,
652
- 'last' , 'ohlc' ]
655
+ args = downsample_methods
653
656
654
657
def _ohlc (group ):
655
658
if isnull (group ).all ():
@@ -679,7 +682,7 @@ def _ohlc(group):
679
682
assert_series_equal (result , expected )
680
683
except BaseException as exc :
681
684
682
- exc .args += ('how=%s' % arg , )
685
+ exc .args += ('how=%s' % arg ,)
683
686
raise
684
687
685
688
def test_resample_how_callables (self ):
@@ -692,7 +695,6 @@ def fn(x, a=1):
692
695
return str (type (x ))
693
696
694
697
class fn_class :
695
-
696
698
def __call__ (self , x ):
697
699
return str (type (x ))
698
700
@@ -768,7 +770,7 @@ def test_resample_rounding(self):
768
770
769
771
from pandas .compat import StringIO
770
772
df = pd .read_csv (StringIO (data ), parse_dates = {'timestamp' : [
771
- 'date' , 'time' ]}, index_col = 'timestamp' )
773
+ 'date' , 'time' ]}, index_col = 'timestamp' )
772
774
df .index .name = None
773
775
result = df .resample ('6s' ).sum ()
774
776
expected = DataFrame ({'value' : [
@@ -1061,10 +1063,10 @@ def test_resample_ohlc_dataframe(self):
1061
1063
1062
1064
df .columns = [['a' , 'b' ], ['c' , 'd' ]]
1063
1065
res = df .resample ('H' ).ohlc ()
1064
- exp .columns = pd .MultiIndex .from_tuples ([( 'a' , 'c' , 'open' ), (
1065
- 'a' , 'c' , 'high ' ), ('a' , 'c' , 'low ' ), ('a' , 'c' , 'close ' ), (
1066
- 'b ' , 'd ' , 'open ' ), ('b' , 'd' , 'high ' ), ('b' , 'd' , 'low ' ), (
1067
- 'b' , 'd' , 'close' )])
1066
+ exp .columns = pd .MultiIndex .from_tuples ([
1067
+ ( 'a' , 'c' , 'open ' ), ('a' , 'c' , 'high ' ), ('a' , 'c' , 'low ' ),
1068
+ ( 'a ' , 'c ' , 'close ' ), ('b' , 'd' , 'open ' ), ('b' , 'd' , 'high ' ),
1069
+ ( 'b' , 'd' , 'low' ), ( 'b' , 'd' , 'close' )])
1068
1070
assert_frame_equal (exp , res )
1069
1071
1070
1072
# dupe columns fail atm
@@ -1333,7 +1335,7 @@ def test_resample_empty(self):
1333
1335
# them to ensure they no longer do. (GH #10228)
1334
1336
for index in tm .all_timeseries_index_generator (0 ):
1335
1337
for dtype in (np .float , np .int , np .object , 'datetime64[ns]' ):
1336
- for how in ( 'count' , 'mean' , 'min' , 'ohlc' , 'last' , 'prod' ) :
1338
+ for how in downsample_methods + upsample_methods :
1337
1339
empty_series = pd .Series ([], index , dtype )
1338
1340
try :
1339
1341
getattr (empty_series .resample ('d' ), how )()
@@ -1342,6 +1344,30 @@ def test_resample_empty(self):
1342
1344
# (ex: doing mean with dtype of np.object)
1343
1345
pass
1344
1346
1347
+ def test_resample_empty_nunique (self ):
1348
+
1349
+ # this routines should be folded into the above when
1350
+ # GH12886 is closed
1351
+ for index in tm .all_timeseries_index_generator (0 ):
1352
+ for dtype in (np .float , np .int , np .object , 'datetime64[ns]' ):
1353
+ for how in ['nunique' ]:
1354
+
1355
+ empty_series = pd .Series ([], index , dtype )
1356
+
1357
+ def f ():
1358
+ getattr (empty_series .resample ('d' ), how )()
1359
+
1360
+ if isinstance (index ,
1361
+ (pd .DatetimeIndex , pd .TimedeltaIndex )):
1362
+ self .assertRaises (Exception , f )
1363
+ else :
1364
+ try :
1365
+ f ()
1366
+ except DataError :
1367
+ # Ignore these since some combinations are invalid
1368
+ # (ex: doing mean with dtype of np.object)
1369
+ pass
1370
+
1345
1371
def test_resample_dtype_preservation (self ):
1346
1372
1347
1373
# GH 12202
@@ -1449,11 +1475,12 @@ def test_resample_anchored_multiday(self):
1449
1475
#
1450
1476
# See: https://github.com/pydata/pandas/issues/8683
1451
1477
1452
- s = pd .Series (np .random .randn (5 ),
1453
- index = pd .date_range ('2014-10-14 23:06:23.206' ,
1454
- periods = 3 , freq = '400L' ) |
1455
- pd .date_range ('2014-10-15 23:00:00' ,
1456
- periods = 2 , freq = '2200L' ))
1478
+ index = pd .date_range (
1479
+ '2014-10-14 23:06:23.206' , periods = 3 , freq = '400L'
1480
+ ) | pd .date_range (
1481
+ '2014-10-15 23:00:00' , periods = 2 , freq = '2200L' )
1482
+
1483
+ s = pd .Series (np .random .randn (5 ), index = index )
1457
1484
1458
1485
# Ensure left closing works
1459
1486
result = s .resample ('2200L' ).mean ()
@@ -1763,7 +1790,6 @@ def _simple_pts(start, end, freq='D'):
1763
1790
1764
1791
1765
1792
class TestResamplePeriodIndex (tm .TestCase ):
1766
-
1767
1793
_multiprocess_can_split_ = True
1768
1794
1769
1795
def test_annual_upsample_D_s_f (self ):
@@ -1907,16 +1933,40 @@ def test_resample_basic(self):
1907
1933
1908
1934
def test_resample_empty (self ):
1909
1935
1910
- # GH12771
1936
+ # GH12771 & GH12868
1911
1937
index = PeriodIndex (start = '2000' , periods = 0 , freq = 'D' , name = 'idx' )
1912
1938
s = Series (index = index )
1913
- result = s .resample ('M' ).sum ()
1914
1939
1915
- # after GH12774 is resolved, this should be a PeriodIndex
1916
- expected_index = DatetimeIndex ([], name = 'idx' )
1940
+ expected_index = PeriodIndex ([], name = 'idx' , freq = 'M' )
1917
1941
expected = Series (index = expected_index )
1942
+
1943
+ for method in resample_methods :
1944
+ result = getattr (s .resample ('M' ), method )()
1945
+ assert_series_equal (result , expected )
1946
+
1947
+ def test_resample_count (self ):
1948
+
1949
+ # GH12774
1950
+ series = pd .Series (1 , index = pd .period_range (start = '2000' ,
1951
+ periods = 100 ))
1952
+ result = series .resample ('M' ).count ()
1953
+
1954
+ expected_index = pd .period_range (start = '2000' , freq = 'M' , periods = 4 )
1955
+ expected = pd .Series ([31 , 29 , 31 , 9 ], index = expected_index )
1956
+
1918
1957
assert_series_equal (result , expected )
1919
1958
1959
+ def test_resample_same_freq (self ):
1960
+
1961
+ # GH12770
1962
+ series = pd .Series (range (3 ), index = pd .period_range (
1963
+ start = '2000' , periods = 3 , freq = 'M' ))
1964
+ expected = series
1965
+
1966
+ for method in resample_methods :
1967
+ result = getattr (series .resample ('M' ), method )()
1968
+ assert_series_equal (result , expected )
1969
+
1920
1970
def test_with_local_timezone_pytz (self ):
1921
1971
# GH5430
1922
1972
tm ._skip_if_no_pytz ()
@@ -2493,8 +2543,8 @@ def test_aggregate_with_nat(self):
2493
2543
# GH 9925
2494
2544
self .assertEqual (dt_result .index .name , 'key' )
2495
2545
2496
- # if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth '
2497
- # doesn't work yet
2546
+ # if NaT is included, 'var', 'std', 'mean', 'first','last'
2547
+ # and 'nth' doesn't work yet
2498
2548
2499
2549
2500
2550
if __name__ == '__main__' :
0 commit comments