Skip to content

Commit a0fffb6

Browse files
committed
PeriodIndex count & resample-on-same-freq fix
1 parent 1c8816f commit a0fffb6

File tree

3 files changed

+83
-58
lines changed

3 files changed

+83
-58
lines changed

doc/source/whatsnew/v0.18.1.txt

+3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ Bug Fixes
215215
- ``usecols`` parameter in ``pd.read_csv`` is now respected even when the lines of a CSV file are not even (:issue:`12203`)
216216
- Bug in ``groupby.transform(..)`` when ``axis=1`` is specified with a non-monotonic ordered index (:issue:`12713`)
217217
- Bug in ``Period`` and ``PeriodIndex`` creation raises ``KeyError`` if ``freq="Minute"`` is specified. Note that "Minute" freq is deprecated in v0.17.0, and recommended to use ``freq="T"`` instead (:issue:`11854`)
218+
- Bug in ``PeriodIndex.resample(...).count()`` always raised a ``TypeError`` (:issue:`12774`)
219+
- Bug in ``PeriodIndex.resample`` casting to ``DatetimeIndex`` when empty (:issue:`12868`)
220+
- Bug in ``PeriodInedx.resample`` when resampling to existing frequency (:issue:`12770`)
218221
- Bug in printing data which contains ``Period`` with different ``freq`` raises ``ValueError`` (:issue:`12615`)
219222
- Bug in numpy compatibility of ``np.round()`` on a ``Series`` (:issue:`12600`)
220223
- Bug in ``Series`` construction with ``Categorical`` and ``dtype='category'`` is specified (:issue:`12574`)

pandas/tseries/resample.py

+19-22
Original file line numberDiff line numberDiff line change
@@ -653,9 +653,6 @@ def _convert_obj(self, obj):
653653
# Cannot have multiple of periods, convert to timestamp
654654
self.kind = 'timestamp'
655655

656-
if not len(obj):
657-
self.kind = 'timestamp'
658-
659656
# convert to timestamp
660657
if not (self.kind is None or self.kind == 'period'):
661658
obj = obj.to_timestamp(how=self.convention)
@@ -673,18 +670,15 @@ def aggregate(self, arg, *args, **kwargs):
673670
def _get_new_index(self):
674671
""" return our new index """
675672
ax = self.ax
676-
ax_attrs = ax._get_attributes_dict()
677-
ax_attrs['freq'] = self.freq
678-
obj = self._selected_obj
679673

680674
if len(ax) == 0:
681-
new_index = PeriodIndex(data=[], **ax_attrs)
682-
return obj.reindex(new_index)
683-
684-
start = ax[0].asfreq(self.freq, how=self.convention)
685-
end = ax[-1].asfreq(self.freq, how='end')
675+
values = []
676+
else:
677+
start = ax[0].asfreq(self.freq, how=self.convention)
678+
end = ax[-1].asfreq(self.freq, how='end')
679+
values = period_range(start, end, freq=self.freq).values
686680

687-
return period_range(start, end, **ax_attrs)
681+
return ax._shallow_copy(values, freq=self.freq)
688682

689683
def _downsample(self, how, **kwargs):
690684
"""
@@ -705,7 +699,7 @@ def _downsample(self, how, **kwargs):
705699

706700
new_index = self._get_new_index()
707701
if len(new_index) == 0:
708-
return self._wrap_result(new_index)
702+
return self._wrap_result(self._selected_obj.reindex(new_index))
709703

710704
# Start vs. end of period
711705
memb = ax.asfreq(self.freq, how=self.convention)
@@ -718,6 +712,8 @@ def _downsample(self, how, **kwargs):
718712
return self._groupby_and_aggregate(grouper, how)
719713
elif is_superperiod(ax.freq, self.freq):
720714
return self.asfreq()
715+
elif ax.freq == self.freq:
716+
return self.asfreq()
721717

722718
raise ValueError('Frequency {axfreq} cannot be '
723719
'resampled to {freq}'.format(
@@ -743,23 +739,24 @@ def _upsample(self, method, limit=None):
743739

744740
ax = self.ax
745741
obj = self.obj
746-
747742
new_index = self._get_new_index()
748-
if len(new_index) == 0:
749-
return self._wrap_result(new_index)
750743

751-
if not is_superperiod(ax.freq, self.freq):
752-
return self.asfreq()
744+
if len(new_index) == 0:
745+
return self._wrap_result(self._selected_obj.reindex(new_index))
753746

754747
# Start vs. end of period
755748
memb = ax.asfreq(self.freq, how=self.convention)
756749

757750
# Get the fill indexer
758751
indexer = memb.get_indexer(new_index, method=method, limit=limit)
759-
return self._wrap_result(_take_new_index(obj,
760-
indexer,
761-
new_index,
762-
axis=self.axis))
752+
return self._wrap_result(_take_new_index(
753+
obj, indexer, new_index, axis=self.axis))
754+
755+
def _groupby_and_aggregate(self, grouper, how, *args, **kwargs):
756+
if grouper is None:
757+
return self._downsample(how, **kwargs)
758+
return super(PeriodIndexResampler, self)._groupby_and_aggregate(
759+
grouper, how, *args, **kwargs)
763760

764761

765762
class TimedeltaResampler(DatetimeIndexResampler):

pandas/tseries/tests/test_resample.py

+61-36
Original file line numberDiff line numberDiff line change
@@ -3,33 +3,34 @@
33
from datetime import datetime, timedelta
44
from functools import partial
55

6-
from pandas.compat import range, lrange, zip, product, OrderedDict
6+
import nose
77
import numpy as np
88

9+
import pandas as pd
10+
import pandas.tseries.offsets as offsets
11+
import pandas.util.testing as tm
912
from pandas import (Series, DataFrame, Panel, Index, isnull,
1013
notnull, Timestamp)
11-
14+
from pandas.compat import range, lrange, zip, product, OrderedDict
15+
from pandas.core.base import SpecificationError
16+
from pandas.core.common import ABCSeries, ABCDataFrame
1217
from pandas.core.groupby import DataError
18+
from pandas.tseries.frequencies import MONTHS, DAYS
1319
from pandas.tseries.index import date_range
14-
from pandas.tseries.tdi import timedelta_range
1520
from pandas.tseries.offsets import Minute, BDay
1621
from pandas.tseries.period import period_range, PeriodIndex, Period
1722
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
1823
DatetimeIndexResampler)
19-
from pandas.tseries.frequencies import MONTHS, DAYS
20-
from pandas.core.common import ABCSeries, ABCDataFrame
21-
from pandas.core.base import SpecificationError
22-
23-
import pandas.tseries.offsets as offsets
24-
import pandas as pd
25-
26-
import nose
27-
24+
from pandas.tseries.tdi import timedelta_range
2825
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
2926
assert_frame_equal)
30-
import pandas.util.testing as tm
3127

3228
bday = BDay()
29+
downsample_methods = ['min', 'max', 'first', 'last', 'sum', 'mean', 'sem',
30+
'median', 'prod', 'ohlc']
31+
upsample_methods = ['count', 'size']
32+
series_methods = ['nunique']
33+
resample_methods = downsample_methods + upsample_methods + series_methods
3334

3435

3536
class TestResampleAPI(tm.TestCase):
@@ -95,12 +96,13 @@ def test_api_changes_v018(self):
9596
self.assertRaises(ValueError, lambda: r.iat[0])
9697
self.assertRaises(ValueError, lambda: r.ix[0])
9798
self.assertRaises(ValueError, lambda: r.loc[
98-
Timestamp('2013-01-01 00:00:00', offset='H')])
99+
Timestamp('2013-01-01 00:00:00', offset='H')])
99100
self.assertRaises(ValueError, lambda: r.at[
100-
Timestamp('2013-01-01 00:00:00', offset='H')])
101+
Timestamp('2013-01-01 00:00:00', offset='H')])
101102

102103
def f():
103104
r[0] = 5
105+
104106
self.assertRaises(ValueError, f)
105107

106108
# str/repr
@@ -144,7 +146,6 @@ def f():
144146

145147
# comparison ops
146148
for op in ['__lt__', '__le__', '__gt__', '__ge__', '__eq__', '__ne__']:
147-
148149
r = self.series.resample('H')
149150

150151
with tm.assert_produces_warning(FutureWarning,
@@ -259,6 +260,7 @@ def test_attribute_access(self):
259260
# setting
260261
def f():
261262
r.F = 'bah'
263+
262264
self.assertRaises(ValueError, f)
263265

264266
def test_api_compat_before_use(self):
@@ -509,10 +511,10 @@ def test_agg_misc(self):
509511
# errors
510512
# invalid names in the agg specification
511513
for t in [r, g]:
512-
513514
def f():
514515
r[['A']].agg({'A': ['sum', 'std'],
515516
'B': ['mean', 'std']})
517+
516518
self.assertRaises(SpecificationError, f)
517519

518520
def test_agg_nested_dicts(self):
@@ -679,7 +681,7 @@ def _ohlc(group):
679681
assert_series_equal(result, expected)
680682
except BaseException as exc:
681683

682-
exc.args += ('how=%s' % arg, )
684+
exc.args += ('how=%s' % arg,)
683685
raise
684686

685687
def test_resample_how_callables(self):
@@ -692,7 +694,6 @@ def fn(x, a=1):
692694
return str(type(x))
693695

694696
class fn_class:
695-
696697
def __call__(self, x):
697698
return str(type(x))
698699

@@ -768,7 +769,7 @@ def test_resample_rounding(self):
768769

769770
from pandas.compat import StringIO
770771
df = pd.read_csv(StringIO(data), parse_dates={'timestamp': [
771-
'date', 'time']}, index_col='timestamp')
772+
'date', 'time']}, index_col='timestamp')
772773
df.index.name = None
773774
result = df.resample('6s').sum()
774775
expected = DataFrame({'value': [
@@ -1061,10 +1062,10 @@ def test_resample_ohlc_dataframe(self):
10611062

10621063
df.columns = [['a', 'b'], ['c', 'd']]
10631064
res = df.resample('H').ohlc()
1064-
exp.columns = pd.MultiIndex.from_tuples([('a', 'c', 'open'), (
1065-
'a', 'c', 'high'), ('a', 'c', 'low'), ('a', 'c', 'close'), (
1066-
'b', 'd', 'open'), ('b', 'd', 'high'), ('b', 'd', 'low'), (
1067-
'b', 'd', 'close')])
1065+
exp.columns = pd.MultiIndex.from_tuples([
1066+
('a', 'c', 'open'), ('a', 'c', 'high'), ('a', 'c', 'low'),
1067+
('a', 'c', 'close'), ('b', 'd', 'open'), ('b', 'd', 'high'),
1068+
('b', 'd', 'low'), ('b', 'd', 'close')])
10681069
assert_frame_equal(exp, res)
10691070

10701071
# dupe columns fail atm
@@ -1449,11 +1450,12 @@ def test_resample_anchored_multiday(self):
14491450
#
14501451
# See: https://github.com/pydata/pandas/issues/8683
14511452

1452-
s = pd.Series(np.random.randn(5),
1453-
index=pd.date_range('2014-10-14 23:06:23.206',
1454-
periods=3, freq='400L') |
1455-
pd.date_range('2014-10-15 23:00:00',
1456-
periods=2, freq='2200L'))
1453+
index = pd.date_range(
1454+
'2014-10-14 23:06:23.206', periods=3, freq='400L'
1455+
) | pd.date_range(
1456+
'2014-10-15 23:00:00', periods=2, freq='2200L')
1457+
1458+
s = pd.Series(np.random.randn(5), index=index)
14571459

14581460
# Ensure left closing works
14591461
result = s.resample('2200L').mean()
@@ -1763,7 +1765,6 @@ def _simple_pts(start, end, freq='D'):
17631765

17641766

17651767
class TestResamplePeriodIndex(tm.TestCase):
1766-
17671768
_multiprocess_can_split_ = True
17681769

17691770
def test_annual_upsample_D_s_f(self):
@@ -1907,16 +1908,40 @@ def test_resample_basic(self):
19071908

19081909
def test_resample_empty(self):
19091910

1910-
# GH12771
1911+
# GH12771 & GH12868
19111912
index = PeriodIndex(start='2000', periods=0, freq='D', name='idx')
19121913
s = Series(index=index)
1913-
result = s.resample('M').sum()
19141914

1915-
# after GH12774 is resolved, this should be a PeriodIndex
1916-
expected_index = DatetimeIndex([], name='idx')
1915+
expected_index = PeriodIndex([], name='idx', freq='M')
19171916
expected = Series(index=expected_index)
1917+
1918+
for method in resample_methods:
1919+
result = getattr(s.resample('M'), method)()
1920+
assert_series_equal(result, expected)
1921+
1922+
def test_resample_count(self):
1923+
1924+
# GH12774
1925+
series = pd.Series(1, index=pd.period_range(start='2000',
1926+
periods=100))
1927+
result = series.resample('M').count()
1928+
1929+
expected_index = pd.period_range(start='2000', freq='M', periods=4)
1930+
expected = pd.Series([31, 29, 31, 9], index=expected_index)
1931+
19181932
assert_series_equal(result, expected)
19191933

1934+
def test_resample_same_freq(self):
1935+
1936+
# GH12770
1937+
series = pd.Series(range(3), index=pd.period_range(
1938+
start='2000', periods=3, freq='M'))
1939+
expected = series
1940+
1941+
for method in resample_methods:
1942+
result = getattr(series.resample('M'), method)()
1943+
assert_series_equal(result, expected)
1944+
19201945
def test_with_local_timezone_pytz(self):
19211946
# GH5430
19221947
tm._skip_if_no_pytz()
@@ -2493,8 +2518,8 @@ def test_aggregate_with_nat(self):
24932518
# GH 9925
24942519
self.assertEqual(dt_result.index.name, 'key')
24952520

2496-
# if NaT is included, 'var', 'std', 'mean', 'first','last' and 'nth'
2497-
# doesn't work yet
2521+
# if NaT is included, 'var', 'std', 'mean', 'first','last'
2522+
# and 'nth' doesn't work yet
24982523

24992524

25002525
if __name__ == '__main__':

0 commit comments

Comments
 (0)