Skip to content

Commit 626f017

Browse files
committed
BUG: resampling PeriodIndex now returns PeriodIndex (GH 12884, 15944)
Exceptions: - force conversion to DatetimeIndex by kind='timestamp' param - if loffset is given, convert to timestamps in any case
1 parent 44799ce commit 626f017

File tree

2 files changed

+109
-42
lines changed

2 files changed

+109
-42
lines changed

pandas/core/resample.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -808,16 +808,15 @@ def _convert_obj(self, obj):
808808
" use .set_index(...) to explicitly set index")
809809
raise NotImplementedError(msg)
810810

811-
offset = to_offset(self.freq)
812-
if offset.n > 1:
813-
if self.kind == 'period': # pragma: no cover
814-
print('Warning: multiple of frequency -> timestamps')
815-
816-
# Cannot have multiple of periods, convert to timestamp
811+
if self.loffset is not None:
812+
if self.kind == 'period':
813+
print('Warning: loffset -> convert PeriodIndex to timestamps')
814+
# Cannot apply loffset/timedelta to PeriodIndex -> convert to
815+
# timestamps
817816
self.kind = 'timestamp'
818817

819818
# convert to timestamp
820-
if not (self.kind is None or self.kind == 'period'):
819+
if self.kind == 'timestamp':
821820
obj = obj.to_timestamp(how=self.convention)
822821

823822
return obj
@@ -1254,8 +1253,10 @@ def _get_period_bins(self, ax):
12541253

12551254
memb = ax.asfreq(self.freq, how=self.convention)
12561255
i8 = memb.asi8
1257-
rng = np.arange(i8[0], i8[-1] + 1)
1258-
bins = memb.searchsorted(rng, side='right')
1256+
freq_mult = self.freq.n
1257+
rng = np.arange(i8[0], i8[-1] + 1, freq_mult)
1258+
rng += freq_mult
1259+
bins = memb.searchsorted(rng, side='left')
12591260

12601261
return binner, bins, labels
12611262

pandas/tests/test_resample.py

+99-33
Original file line numberDiff line numberDiff line change
@@ -2179,49 +2179,75 @@ def create_series(self):
21792179
return Series(np.arange(len(i)), index=i, name='pi')
21802180

21812181
def test_asfreq_downsample(self):
2182+
# GH 12884, 15944
21822183

2183-
# series
21842184
s = self.create_series()
2185-
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
2186-
expected.index = expected.index.to_timestamp()
2187-
expected.index.freq = to_offset('2D')
2185+
start = s.index[0].to_timestamp(how='start')
2186+
end = (s.index[-1] + 1).to_timestamp(how='start')
21882187

2189-
# this is a bug, this *should* return a PeriodIndex
2190-
# directly
2191-
# GH 12884
2188+
new_index = date_range(start=start, end=end, freq='2D', closed='left')
2189+
# series
2190+
expected = s.to_timestamp().reindex(new_index).to_period('2D')
21922191
result = s.resample('2D').asfreq()
21932192
assert_series_equal(result, expected)
2193+
result_kind_period = s.resample('2D', kind='period').asfreq()
2194+
assert_series_equal(result_kind_period, expected)
21942195

21952196
# frame
21962197
frame = s.to_frame('value')
2197-
expected = frame.reindex(
2198-
frame.index.take(np.arange(0, len(frame.index), 2)))
2199-
expected.index = expected.index.to_timestamp()
2200-
expected.index.freq = to_offset('2D')
2198+
expected = frame.to_timestamp().reindex(new_index).to_period('2D')
22012199
result = frame.resample('2D').asfreq()
22022200
assert_frame_equal(result, expected)
2201+
result_kind_period = frame.resample('2D', kind='period').asfreq()
2202+
assert_frame_equal(result_kind_period, expected)
2203+
2204+
def test_asfreq_downsample_kind_timestamp(self):
2205+
# series
2206+
s = self.create_series()
2207+
expected = s.to_timestamp().resample('2D').asfreq()
2208+
result = s.resample('2D', kind='timestamp').asfreq()
2209+
assert_series_equal(result, expected)
2210+
2211+
# frame
2212+
frame = s.to_frame('value')
2213+
expected = frame.to_timestamp().resample('2D').asfreq()
2214+
result = frame.resample('2D', kind='timestamp').asfreq()
2215+
assert_frame_equal(result, expected)
22032216

22042217
def test_asfreq_upsample(self):
2218+
# GH 12884, 15944
22052219

2206-
# this is a bug, this *should* return a PeriodIndex
2207-
# directly
2208-
# GH 12884
22092220
s = self.create_series()
2210-
new_index = date_range(s.index[0].to_timestamp(how='start'),
2211-
(s.index[-1] + 1).to_timestamp(how='start'),
2212-
freq='1H',
2213-
closed='left')
2214-
expected = s.to_timestamp().reindex(new_index).to_period()
2215-
result = s.resample('1H').asfreq()
2221+
start = s.index[0].to_timestamp(how='start')
2222+
end = (s.index[-1] + 1).to_timestamp(how='start')
2223+
for freq in ['1H', '2H']:
2224+
# check base frequency and frequency multiple
2225+
new_index = date_range(start=start, end=end, freq=freq,
2226+
closed='left')
2227+
# series
2228+
expected = s.to_timestamp().reindex(new_index).to_period(freq)
2229+
result = s.resample(freq).asfreq()
2230+
assert_series_equal(result, expected)
2231+
result_kind_period = s.resample(freq, kind='period').asfreq()
2232+
assert_series_equal(result_kind_period, expected)
2233+
2234+
# frame
2235+
frame = s.to_frame('value')
2236+
expected = frame.to_timestamp().reindex(new_index).to_period(freq)
2237+
result = frame.resample(freq).asfreq()
2238+
assert_frame_equal(result, expected)
2239+
result_kind_period = frame.resample(freq, kind='period').asfreq()
2240+
assert_frame_equal(result_kind_period, expected)
2241+
2242+
def test_asfreq_upsample_kind_timestamp(self):
2243+
s = self.create_series()
2244+
expected = s.to_timestamp().resample('1H').asfreq()
2245+
result = s.resample('1H', kind='timestamp').asfreq()
22162246
assert_series_equal(result, expected)
22172247

22182248
frame = s.to_frame('value')
2219-
new_index = date_range(frame.index[0].to_timestamp(how='start'),
2220-
(frame.index[-1] + 1).to_timestamp(how='start'),
2221-
freq='1H',
2222-
closed='left')
2223-
expected = frame.to_timestamp().reindex(new_index).to_period()
2224-
result = frame.resample('1H').asfreq()
2249+
expected = frame.to_timestamp().resample('1H').asfreq()
2250+
result = frame.resample('1H', kind='timestamp').asfreq()
22252251
assert_frame_equal(result, expected)
22262252

22272253
def test_asfreq_fill_value(self):
@@ -2333,12 +2359,11 @@ def test_basic_upsample(self):
23332359
ts = _simple_pts('1/1/1990', '6/30/1995', freq='M')
23342360
result = ts.resample('a-dec').mean()
23352361

2336-
resampled = result.resample('D', convention='end').ffill()
2337-
2338-
expected = result.to_timestamp('D', how='end')
2339-
expected = expected.asfreq('D', 'ffill').to_period()
2340-
2341-
assert_series_equal(resampled, expected)
2362+
for freq in ['D', '2D']:
2363+
resampled = result.resample(freq, convention='end').ffill()
2364+
expected = result.to_timestamp(freq, how='end')
2365+
expected = expected.asfreq(freq, 'ffill').to_period(freq)
2366+
assert_series_equal(resampled, expected)
23422367

23432368
def test_upsample_with_limit(self):
23442369
rng = period_range('1/1/2000', periods=5, freq='A')
@@ -2409,10 +2434,13 @@ def test_resample_count(self):
24092434
series = pd.Series(1, index=pd.period_range(start='2000',
24102435
periods=100))
24112436
result = series.resample('M').count()
2412-
24132437
expected_index = pd.period_range(start='2000', freq='M', periods=4)
24142438
expected = pd.Series([31, 29, 31, 9], index=expected_index)
2439+
assert_series_equal(result, expected)
24152440

2441+
result = series.resample('2M').count()
2442+
expected_index = pd.period_range(start='2000', freq='2M', periods=2)
2443+
expected = pd.Series([31 + 29, 31 + 9], index=expected_index)
24162444
assert_series_equal(result, expected)
24172445

24182446
def test_resample_same_freq(self):
@@ -2560,7 +2588,17 @@ def test_resample_5minute(self):
25602588
rng = period_range('1/1/2000', '1/5/2000', freq='T')
25612589
ts = Series(np.random.randn(len(rng)), index=rng)
25622590

2591+
expected = ts.to_timestamp().resample('5min').mean().to_period('5min')
25632592
result = ts.resample('5min').mean()
2593+
assert_series_equal(result, expected)
2594+
result_kind_period = ts.resample('5min', kind='period').mean()
2595+
assert_series_equal(result_kind_period, expected)
2596+
2597+
def test_resample_5minute_kind_timestamp(self):
2598+
rng = period_range('1/1/2000', '1/5/2000', freq='T')
2599+
ts = Series(np.random.randn(len(rng)), index=rng)
2600+
2601+
result = ts.resample('5min', kind='timestamp').mean()
25642602
expected = ts.to_timestamp().resample('5min').mean()
25652603
assert_series_equal(result, expected)
25662604

@@ -2781,6 +2819,34 @@ def test_evenly_divisible_with_no_extra_bins(self):
27812819
result = df.resample('7D').sum()
27822820
assert_frame_equal(result, expected)
27832821

2822+
def test_loffset_returns_datetimeindex(self):
2823+
# make sure passing loffset returns DatetimeIndex in all cases
2824+
# basic method taken from Base.test_resample_loffset_arg_type()
2825+
df = self.create_series().to_frame('value')
2826+
expected_means = [df.values[i:i + 2].mean()
2827+
for i in range(0, len(df.values), 2)]
2828+
expected_index = self.create_index(df.index[0], periods=len(df.index) /
2829+
2, freq='2D')
2830+
2831+
# loffset coreces PeriodIndex to DateTimeIndex
2832+
expected_index = expected_index.to_timestamp()
2833+
expected_index += timedelta(hours=2)
2834+
expected = DataFrame({'value': expected_means}, index=expected_index)
2835+
2836+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
2837+
for kind_param in [None, 'period', 'timestamp']:
2838+
result_agg = (df.resample('2D', loffset='2H', kind=kind_param)
2839+
.agg(arg))
2840+
with tm.assert_produces_warning(FutureWarning,
2841+
check_stacklevel=False):
2842+
result_how = df.resample('2D', how=arg, loffset='2H',
2843+
kind=kind_param)
2844+
if isinstance(arg, list):
2845+
expected.columns = (pd.MultiIndex
2846+
.from_tuples([('value', 'mean')]))
2847+
assert_frame_equal(result_agg, expected)
2848+
assert_frame_equal(result_how, expected)
2849+
27842850

27852851
class TestTimedeltaIndex(Base, tm.TestCase):
27862852
_index_factory = lambda x: timedelta_range

0 commit comments

Comments
 (0)