Skip to content

Commit 77be872

Browse files
max-sixtyjreback
authored andcommitted
BUG: PeriodIndex count & resample-on-same-freq fix
closes #12774 closes #12868 closes #12770 closes #12874
1 parent b874cb3 commit 77be872

File tree

3 files changed

+111
-61
lines changed

3 files changed

+111
-61
lines changed

doc/source/whatsnew/v0.18.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,9 @@ Bug Fixes
217217
- ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`)
218218
- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:`12713`)
219219
- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`)
220+
- Bug in ``.resample(...).count()`` with a ``PeriodIndex`` always raising a ``TypeError`` (:issue:`12774`)
221+
- Bug in ``.resample(...)`` with a ``PeriodIndex`` casting to a ``DatetimeIndex`` when empty (:issue:`12868`)
222+
- Bug in ``.resample(...)`` with a ``PeriodIndex`` when resampling to an existing frequency (:issue:`12770`)
220223
- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`)
221224
- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`)
222225
- Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`)

pandas/tseries/resample.py

+19-22
Original file line numberDiff line numberDiff line change
@@ -653,9 +653,6 @@ def _convert_obj(self, obj):
653653
# Cannot have multiple of periods, convert to timestamp
654654
self.kind = 'timestamp'
655655

656-
if not len(obj):
657-
self.kind = 'timestamp'
658-
659656
# convert to timestamp
660657
if not (self.kind is None or self.kind == 'period'):
661658
obj = obj.to_timestamp(how=self.convention)
@@ -673,18 +670,15 @@ def aggregate(self, arg, *args, **kwargs):
673670
def _get_new_index(self):
674671
""" return our new index """
675672
ax = self.ax
676-
ax_attrs = ax._get_attributes_dict()
677-
ax_attrs['freq'] = self.freq
678-
obj = self._selected_obj
679673

680674
if len(ax) == 0:
681-
new_index = PeriodIndex(data=[], **ax_attrs)
682-
return obj.reindex(new_index)
683-
684-
start = ax[0].asfreq(self.freq, how=self.convention)
685-
end = ax[-1].asfreq(self.freq, how='end')
675+
values = []
676+
else:
677+
start = ax[0].asfreq(self.freq, how=self.convention)
678+
end = ax[-1].asfreq(self.freq, how='end')
679+
values = period_range(start, end, freq=self.freq).values
686680

687-
return period_range(start, end, **ax_attrs)
681+
return ax._shallow_copy(values, freq=self.freq)
688682

689683
def _downsample(self, how, **kwargs):
690684
"""
@@ -705,7 +699,7 @@ def _downsample(self, how, **kwargs):
705699

706700
new_index = self._get_new_index()
707701
if len(new_index) == 0:
708-
return self._wrap_result(new_index)
702+
return self._wrap_result(self._selected_obj.reindex(new_index))
709703

710704
# Start vs. end of period
711705
memb = ax.asfreq(self.freq, how=self.convention)
@@ -718,6 +712,8 @@ def _downsample(self, how, **kwargs):
718712
return self._groupby_and_aggregate(grouper, how)
719713
elif is_superperiod(ax.freq, self.freq):
720714
return self.asfreq()
715+
elif ax.freq == self.freq:
716+
return self.asfreq()
721717

722718
raise ValueError('Frequency {axfreq} cannot be '
723719
'resampled to {freq}'.format(
@@ -743,23 +739,24 @@ def _upsample(self, method, limit=None):
743739

744740
ax = self.ax
745741
obj = self.obj
746-
747742
new_index = self._get_new_index()
748-
if len(new_index) == 0:
749-
return self._wrap_result(new_index)
750743

751-
if not is_superperiod(ax.freq, self.freq):
752-
return self.asfreq()
744+
if len(new_index) == 0:
745+
return self._wrap_result(self._selected_obj.reindex(new_index))
753746

754747
# Start vs. end of period
755748
memb = ax.asfreq(self.freq, how=self.convention)
756749

757750
# Get the fill indexer
758751
indexer = memb.get_indexer(new_index, method=method, limit=limit)
759-
return self._wrap_result(_take_new_index(obj,
760-
indexer,
761-
new_index,
762-
axis=self.axis))
752+
return self._wrap_result(_take_new_index(
753+
obj, indexer, new_index, axis=self.axis))
754+
755+
def _groupby_and_aggregate(self, grouper, how, *args, **kwargs):
756+
if grouper is None:
757+
return self._downsample(how, **kwargs)
758+
return super(PeriodIndexResampler, self)._groupby_and_aggregate(
759+
grouper, how, *args, **kwargs)
763760

764761

765762
class TimedeltaResampler(DatetimeIndexResampler):

pandas/tseries/tests/test_resample.py

+89-39
Original file line numberDiff line numberDiff line change
@@ -3,34 +3,37 @@
33
from datetime import datetime, timedelta
44
from functools import partial
55

6-
from pandas.compat import range, lrange, zip, product, OrderedDict
6+
import nose
77
import numpy as np
88

9+
import pandas as pd
10+
import pandas.tseries.offsets as offsets
11+
import pandas.util.testing as tm
912
from pandas import (Series, DataFrame, Panel, Index, isnull,
1013
notnull, Timestamp)
11-
14+
from pandas.compat import range, lrange, zip, product, OrderedDict
15+
from pandas.core.base import SpecificationError
16+
from pandas.core.common import ABCSeries, ABCDataFrame
1217
from pandas.core.groupby import DataError
18+
from pandas.tseries.frequencies import MONTHS, DAYS
1319
from pandas.tseries.index import date_range
14-
from pandas.tseries.tdi import timedelta_range
1520
from pandas.tseries.offsets import Minute, BDay
1621
from pandas.tseries.period import period_range, PeriodIndex, Period
1722
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
1823
DatetimeIndexResampler)
19-
from pandas.tseries.frequencies import MONTHS, DAYS
20-
from pandas.core.common import ABCSeries, ABCDataFrame
21-
from pandas.core.base import SpecificationError
22-
23-
import pandas.tseries.offsets as offsets
24-
import pandas as pd
25-
26-
import nose
27-
24+
from pandas.tseries.tdi import timedelta_range
2825
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
2926
assert_frame_equal)
30-
import pandas.util.testing as tm
3127

3228
bday = BDay()
3329

30+
# The various methods we support
31+
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
32+
'median', 'prod', 'var', 'ohlc']
33+
upsample_methods = ['count', 'size']
34+
series_methods = ['nunique']
35+
resample_methods = downsample_methods + upsample_methods + series_methods
36+
3437

3538
class TestResampleAPI(tm.TestCase):
3639
_multiprocess_can_split_ = True
@@ -95,12 +98,13 @@ def test_api_changes_v018(self):
9598
self.assertRaises(ValueError, lambda: r.iat[0])
9699
self.assertRaises(ValueError, lambda: r.ix[0])
97100
self.assertRaises(ValueError, lambda: r.loc[
98-
Timestamp('2013-01-01 00:00:00', offset='H')])
101+
Timestamp('2013-01-01 00:00:00', offset='H')])
99102
self.assertRaises(ValueError, lambda: r.at[
100-
Timestamp('2013-01-01 00:00:00', offset='H')])
103+
Timestamp('2013-01-01 00:00:00', offset='H')])
101104

102105
def f():
103106
r[0] = 5
107+
104108
self.assertRaises(ValueError, f)
105109

106110
# str/repr
@@ -144,7 +148,6 @@ def f():
144148

145149
# comparison ops
146150
for op in ['__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__']:
147-
148151
r = self.series.resample('H')
149152

150153
with tm.assert_produces_warning(FutureWarning,
@@ -259,6 +262,7 @@ def test_attribute_access(self):
259262
# setting
260263
def f():
261264
r.F = 'bah'
265+
262266
self.assertRaises(ValueError, f)
263267

264268
def test_api_compat_before_use(self):
@@ -509,10 +513,10 @@ def test_agg_misc(self):
509513
# errors
510514
# invalid names in the agg specification
511515
for t in [r, g]:
512-
513516
def f():
514517
r[['A']].agg({'A': ['sum', 'std'],
515518
'B': ['mean', 'std']})
519+
516520
self.assertRaises(SpecificationError, f)
517521

518522
def test_agg_nested_dicts(self):
@@ -648,8 +652,7 @@ def test_resample_how(self):
648652
grouplist[1:6] = 1
649653
grouplist[6:11] = 2
650654
grouplist[11:] = 3
651-
args = ['sum', 'mean', 'std', 'sem', 'max', 'min', 'median', 'first',
652-
'last', 'ohlc']
655+
args = downsample_methods
653656

654657
def _ohlc(group):
655658
if isnull(group).all():
@@ -679,7 +682,7 @@ def _ohlc(group):
679682
assert_series_equal(result, expected)
680683
except BaseException as exc:
681684

682-
exc.args += ('how=%s' % arg, )
685+
exc.args += ('how=%s' % arg,)
683686
raise
684687

685688
def test_resample_how_callables(self):
@@ -692,7 +695,6 @@ def fn(x, a=1):
692695
return str(type(x))
693696

694697
class fn_class:
695-
696698
def __call__(self, x):
697699
return str(type(x))
698700

@@ -768,7 +770,7 @@ def test_resample_rounding(self):
768770

769771
from pandas.compat import StringIO
770772
df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [
771-
'date', 'time']}, index_col='timestamp')
773+
'date', 'time']}, index_col='timestamp')
772774
df.index.name = None
773775
result = df.resample('6s').sum()
774776
expected = DataFrame({'value': [
@@ -1061,10 +1063,10 @@ def test_resample_ohlc_dataframe(self):
10611063

10621064
df.columns = [['a', 'b'], ['c', 'd']]
10631065
res = df.resample('H').ohlc()
1064-
exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), (
1065-
'a', 'c', 'high'), ('a', 'c', 'low'), ('a', 'c', 'close'), (
1066-
'b', 'd', 'open'), ('b', 'd', 'high'), ('b', 'd', 'low'), (
1067-
'b', 'd', 'close')])
1066+
exp.columns = pd.MultiIndex.from_tuples([
1067+
('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'),
1068+
('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'),
1069+
('b', 'd', 'low'), ('b', 'd', 'close')])
10681070
assert_frame_equal(exp, res)
10691071

10701072
# dupe columns fail atm
@@ -1333,7 +1335,7 @@ def test_resample_empty(self):
13331335
# them to ensure they no longer do. (GH #10228)
13341336
for index in tm.all_timeseries_index_generator(0):
13351337
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
1336-
for how in ('count', 'mean', 'min', 'ohlc', 'last', 'prod'):
1338+
for how in downsample_methods + upsample_methods:
13371339
empty_series = pd.Series([], index, dtype)
13381340
try:
13391341
getattr(empty_series.resample('d'), how)()
@@ -1342,6 +1344,30 @@ def test_resample_empty(self):
13421344
# (ex: doing mean with dtype of np.object)
13431345
pass
13441346

1347+
def test_resample_empty_nunique(self):
1348+
1349+
# this routines should be folded into the above when
1350+
# GH12886 is closed
1351+
for index in tm.all_timeseries_index_generator(0):
1352+
for dtype in (np.float, np.int, np.object, 'datetime64[ns]'):
1353+
for how in ['nunique']:
1354+
1355+
empty_series = pd.Series([], index, dtype)
1356+
1357+
def f():
1358+
getattr(empty_series.resample('d'), how)()
1359+
1360+
if isinstance(index,
1361+
(pd.DatetimeIndex, pd.TimedeltaIndex)):
1362+
self.assertRaises(Exception, f)
1363+
else:
1364+
try:
1365+
f()
1366+
except DataError:
1367+
# Ignore these since some combinations are invalid
1368+
# (ex: doing mean with dtype of np.object)
1369+
pass
1370+
13451371
def test_resample_dtype_preservation(self):
13461372

13471373
# GH 12202
@@ -1449,11 +1475,12 @@ def test_resample_anchored_multiday(self):
14491475
#
14501476
# See: https://github.com/pydata/pandas/issues/8683
14511477

1452-
s = pd.Series(np.random.randn(5),
1453-
index=pd.date_range('2014-10-14 23:06:23.206',
1454-
periods=3, freq='400L') |
1455-
pd.date_range('2014-10-15 23:00:00',
1456-
periods=2, freq='2200L'))
1478+
index = pd.date_range(
1479+
'2014-10-14 23:06:23.206', periods=3, freq='400L'
1480+
) | pd.date_range(
1481+
'2014-10-15 23:00:00', periods=2, freq='2200L')
1482+
1483+
s = pd.Series(np.random.randn(5), index=index)
14571484

14581485
# Ensure left closing works
14591486
result = s.resample('2200L').mean()
@@ -1763,7 +1790,6 @@ def _simple_pts(start, end, freq='D'):
17631790

17641791

17651792
class TestResamplePeriodIndex(tm.TestCase):
1766-
17671793
_multiprocess_can_split_ = True
17681794

17691795
def test_annual_upsample_D_s_f(self):
@@ -1907,16 +1933,40 @@ def test_resample_basic(self):
19071933

19081934
def test_resample_empty(self):
19091935

1910-
# GH12771
1936+
# GH12771 & GH12868
19111937
index = PeriodIndex(start='2000', periods=0, freq='D', name='idx')
19121938
s = Series(index=index)
1913-
result = s.resample('M').sum()
19141939

1915-
# after GH12774 is resolved, this should be a PeriodIndex
1916-
expected_index = DatetimeIndex([], name='idx')
1940+
expected_index = PeriodIndex([], name='idx', freq='M')
19171941
expected = Series(index=expected_index)
1942+
1943+
for method in resample_methods:
1944+
result = getattr(s.resample('M'), method)()
1945+
assert_series_equal(result, expected)
1946+
1947+
def test_resample_count(self):
1948+
1949+
# GH12774
1950+
series = pd.Series(1, index=pd.period_range(start='2000',
1951+
periods=100))
1952+
result = series.resample('M').count()
1953+
1954+
expected_index = pd.period_range(start='2000', freq='M', periods=4)
1955+
expected = pd.Series([31, 29, 31, 9], index=expected_index)
1956+
19181957
assert_series_equal(result, expected)
19191958

1959+
def test_resample_same_freq(self):
1960+
1961+
# GH12770
1962+
series = pd.Series(range(3), index=pd.period_range(
1963+
start='2000', periods=3, freq='M'))
1964+
expected = series
1965+
1966+
for method in resample_methods:
1967+
result = getattr(series.resample('M'), method)()
1968+
assert_series_equal(result, expected)
1969+
19201970
def test_with_local_timezone_pytz(self):
19211971
# GH5430
19221972
tm._skip_if_no_pytz()
@@ -2493,8 +2543,8 @@ def test_aggregate_with_nat(self):
24932543
# GH 9925
24942544
self.assertEqual(dt_result.index.name, 'key')
24952545

2496-
# if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth'
2497-
# doesn't work yet
2546+
# if NaT is included, 'var', 'std', 'mean', 'first','last'
2547+
# and 'nth' doesn't work yet
24982548

24992549

25002550
if __name__ == '__main__':

0 commit comments

Comments
 (0)