Skip to content

Commit a3c9052

Browse files
committed
BUG: .asfreq on resample on PeriodIndex/TimedeltaIndex are not
including the full range closes #12926
1 parent 1320ef7 commit a3c9052

File tree

5 files changed

+200
-29
lines changed

5 files changed

+200
-29
lines changed

doc/source/whatsnew/v0.18.1.txt

+1
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,7 @@ Bug Fixes
287287

288288

289289
- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`)
290+
- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`)
290291
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
291292
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)
292293

pandas/tseries/frequencies.py

+28-13
Original file line numberDiff line numberDiff line change
@@ -1124,6 +1124,26 @@ def _maybe_add_count(base, count):
11241124
return base
11251125

11261126

1127+
def _maybe_coerce_freq(code):
1128+
""" we might need to coerce a code to a rule_code
1129+
and uppercase it
1130+
1131+
Parameters
1132+
----------
1133+
source : string
1134+
Frequency converting from
1135+
1136+
Returns
1137+
-------
1138+
string code
1139+
"""
1140+
1141+
assert code is not None
1142+
if isinstance(code, offsets.DateOffset):
1143+
code = code.rule_code
1144+
return code.upper()
1145+
1146+
11271147
def is_subperiod(source, target):
11281148
"""
11291149
Returns True if downsampling is possible between source and target
@@ -1140,14 +1160,12 @@ def is_subperiod(source, target):
11401160
-------
11411161
is_subperiod : boolean
11421162
"""
1143-
if isinstance(source, offsets.DateOffset):
1144-
source = source.rule_code
11451163

1146-
if isinstance(target, offsets.DateOffset):
1147-
target = target.rule_code
1164+
if target is None or source is None:
1165+
return False
1166+
source = _maybe_coerce_freq(source)
1167+
target = _maybe_coerce_freq(target)
11481168

1149-
target = target.upper()
1150-
source = source.upper()
11511169
if _is_annual(target):
11521170
if _is_quarterly(source):
11531171
return _quarter_months_conform(_get_rule_month(source),
@@ -1195,14 +1213,11 @@ def is_superperiod(source, target):
11951213
-------
11961214
is_superperiod : boolean
11971215
"""
1198-
if isinstance(source, offsets.DateOffset):
1199-
source = source.rule_code
1200-
1201-
if isinstance(target, offsets.DateOffset):
1202-
target = target.rule_code
1216+
if target is None or source is None:
1217+
return False
1218+
source = _maybe_coerce_freq(source)
1219+
target = _maybe_coerce_freq(target)
12031220

1204-
target = target.upper()
1205-
source = source.upper()
12061221
if _is_annual(source):
12071222
if _is_annual(target):
12081223
return _get_rule_month(source) == _get_rule_month(target)

pandas/tseries/resample.py

+28-7
Original file line numberDiff line numberDiff line change
@@ -595,6 +595,14 @@ def _downsample(self, how, **kwargs):
595595

596596
return self._wrap_result(result)
597597

598+
def _adjust_binner_for_upsample(self, binner):
599+
""" adjust our binner when upsampling """
600+
if self.closed == 'right':
601+
binner = binner[1:]
602+
else:
603+
binner = binner[:-1]
604+
return binner
605+
598606
def _upsample(self, method, limit=None):
599607
"""
600608
method : string {'backfill', 'bfill', 'pad', 'ffill'}
@@ -614,11 +622,7 @@ def _upsample(self, method, limit=None):
614622
ax = self.ax
615623
obj = self._selected_obj
616624
binner = self.binner
617-
618-
if self.closed == 'right':
619-
res_index = binner[1:]
620-
else:
621-
res_index = binner[:-1]
625+
res_index = self._adjust_binner_for_upsample(binner)
622626

623627
# if we have the same frequency as our axis, then we are equal sampling
624628
if limit is None and to_offset(ax.inferred_freq) == self.freq:
@@ -764,6 +768,20 @@ class TimedeltaResampler(DatetimeIndexResampler):
764768
def _get_binner_for_time(self):
765769
return self.groupby._get_time_delta_bins(self.ax)
766770

771+
def _adjust_binner_for_upsample(self, binner):
772+
""" adjust our binner when upsampling """
773+
ax = self.ax
774+
775+
if is_subperiod(ax.freq, self.freq):
776+
# We are actually downsampling
777+
# but are in the asfreq path
778+
# GH 12926
779+
if self.closed == 'right':
780+
binner = binner[1:]
781+
else:
782+
binner = binner[:-1]
783+
return binner
784+
767785

768786
def resample(obj, kind=None, **kwds):
769787
""" create a TimeGrouper and return our resampler """
@@ -1004,8 +1022,11 @@ def _get_time_delta_bins(self, ax):
10041022
data=[], freq=self.freq, name=ax.name)
10051023
return binner, [], labels
10061024

1007-
labels = binner = TimedeltaIndex(start=ax[0],
1008-
end=ax[-1],
1025+
# we need 1 extra bin here to accomodate the self.closed
1026+
start = ax[0]
1027+
end = ax[-1]
1028+
labels = binner = TimedeltaIndex(start=start,
1029+
end=end,
10091030
freq=self.freq,
10101031
name=ax.name)
10111032

pandas/tseries/tests/test_frequencies.py

+9
Original file line numberDiff line numberDiff line change
@@ -670,6 +670,15 @@ def test_legacy_offset_warnings(self):
670670

671671

672672
def test_is_superperiod_subperiod():
673+
674+
# input validation
675+
assert not (frequencies.is_superperiod(offsets.YearEnd(), None))
676+
assert not (frequencies.is_subperiod(offsets.MonthEnd(), None))
677+
assert not (frequencies.is_superperiod(None, offsets.YearEnd()))
678+
assert not (frequencies.is_subperiod(None, offsets.MonthEnd()))
679+
assert not (frequencies.is_superperiod(None, None))
680+
assert not (frequencies.is_subperiod(None, None))
681+
673682
assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
674683
assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))
675684

pandas/tseries/tests/test_resample.py

+134-9
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from pandas.tseries.period import period_range, PeriodIndex, Period
2222
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
2323
DatetimeIndexResampler)
24+
from pandas.tseries.frequencies import to_offset
2425
from pandas.tseries.tdi import timedelta_range
2526
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
2627
assert_frame_equal)
@@ -35,6 +36,16 @@
3536
resample_methods = downsample_methods + upsample_methods + series_methods
3637

3738

39+
def _simple_ts(start, end, freq='D'):
40+
rng = date_range(start, end, freq=freq)
41+
return Series(np.random.randn(len(rng)), index=rng)
42+
43+
44+
def _simple_pts(start, end, freq='D'):
45+
rng = period_range(start, end, freq=freq)
46+
return Series(np.random.randn(len(rng)), index=rng)
47+
48+
3849
class TestResampleAPI(tm.TestCase):
3950
_multiprocess_can_split_ = True
4051

@@ -566,15 +577,63 @@ def test_agg_consistency(self):
566577
assert_frame_equal(result, expected)
567578

568579

569-
class TestResample(tm.TestCase):
580+
class Base(object):
581+
"""
582+
base class for resampling testing, calling
583+
.create_series() generates a series of each index type
584+
"""
585+
def create_index(self, *args, **kwargs):
586+
""" return the _index_factory created using the args, kwargs """
587+
factory = self._index_factory()
588+
return factory(*args, **kwargs)
589+
590+
def test_asfreq_downsample(self):
591+
s = self.create_series()
592+
593+
result = s.resample('2D').asfreq()
594+
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
595+
expected.index.freq = to_offset('2D')
596+
assert_series_equal(result, expected)
597+
598+
frame = s.to_frame('value')
599+
result = frame.resample('2D').asfreq()
600+
expected = frame.reindex(
601+
frame.index.take(np.arange(0, len(frame.index), 2)))
602+
expected.index.freq = to_offset('2D')
603+
assert_frame_equal(result, expected)
604+
605+
def test_asfreq_upsample(self):
606+
s = self.create_series()
607+
608+
result = s.resample('1H').asfreq()
609+
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
610+
expected = s.reindex(new_index)
611+
assert_series_equal(result, expected)
612+
613+
frame = s.to_frame('value')
614+
result = frame.resample('1H').asfreq()
615+
new_index = self.create_index(frame.index[0],
616+
frame.index[-1], freq='1H')
617+
expected = frame.reindex(new_index)
618+
assert_frame_equal(result, expected)
619+
620+
621+
class TestDatetimeIndex(Base, tm.TestCase):
570622
_multiprocess_can_split_ = True
623+
_index_factory = lambda x: date_range
571624

572625
def setUp(self):
573626
dti = DatetimeIndex(start=datetime(2005, 1, 1),
574627
end=datetime(2005, 1, 10), freq='Min')
575628

576629
self.series = Series(np.random.rand(len(dti)), dti)
577630

631+
def create_series(self):
632+
i = date_range(datetime(2005, 1, 1),
633+
datetime(2005, 1, 10), freq='D')
634+
635+
return Series(np.arange(len(i)), index=i, name='dti')
636+
578637
def test_custom_grouper(self):
579638

580639
dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
@@ -1798,18 +1857,61 @@ def test_resmaple_dst_anchor(self):
17981857
'D Frequency')
17991858

18001859

1801-
def _simple_ts(start, end, freq='D'):
1802-
rng = date_range(start, end, freq=freq)
1803-
return Series(np.random.randn(len(rng)), index=rng)
1860+
class TestPeriodIndex(Base, tm.TestCase):
1861+
_multiprocess_can_split_ = True
1862+
_index_factory = lambda x: period_range
18041863

1864+
def create_series(self):
1865+
i = period_range(datetime(2005, 1, 1),
1866+
datetime(2005, 1, 10), freq='D')
18051867

1806-
def _simple_pts(start, end, freq='D'):
1807-
rng = period_range(start, end, freq=freq)
1808-
return Series(np.random.randn(len(rng)), index=rng)
1868+
return Series(np.arange(len(i)), index=i, name='pi')
18091869

1870+
def test_asfreq_downsample(self):
18101871

1811-
class TestResamplePeriodIndex(tm.TestCase):
1812-
_multiprocess_can_split_ = True
1872+
# series
1873+
s = self.create_series()
1874+
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
1875+
expected.index = expected.index.to_timestamp()
1876+
expected.index.freq = to_offset('2D')
1877+
1878+
# this is a bug, this *should* return a PeriodIndex
1879+
# directly
1880+
# GH 12884
1881+
result = s.resample('2D').asfreq()
1882+
assert_series_equal(result, expected)
1883+
1884+
# frame
1885+
frame = s.to_frame('value')
1886+
expected = frame.reindex(
1887+
frame.index.take(np.arange(0, len(frame.index), 2)))
1888+
expected.index = expected.index.to_timestamp()
1889+
expected.index.freq = to_offset('2D')
1890+
result = frame.resample('2D').asfreq()
1891+
assert_frame_equal(result, expected)
1892+
1893+
def test_asfreq_upsample(self):
1894+
1895+
# this is a bug, this *should* return a PeriodIndex
1896+
# directly
1897+
# GH 12884
1898+
s = self.create_series()
1899+
new_index = date_range(s.index[0].to_timestamp(how='start'),
1900+
(s.index[-1] + 1).to_timestamp(how='start'),
1901+
freq='1H',
1902+
closed='left')
1903+
expected = s.to_timestamp().reindex(new_index).to_period()
1904+
result = s.resample('1H').asfreq()
1905+
assert_series_equal(result, expected)
1906+
1907+
frame = s.to_frame('value')
1908+
new_index = date_range(frame.index[0].to_timestamp(how='start'),
1909+
(frame.index[-1] + 1).to_timestamp(how='start'),
1910+
freq='1H',
1911+
closed='left')
1912+
expected = frame.to_timestamp().reindex(new_index).to_period()
1913+
result = frame.resample('1H').asfreq()
1914+
assert_frame_equal(result, expected)
18131915

18141916
def test_annual_upsample_D_s_f(self):
18151917
self._check_annual_upsample_cases('D', 'start', 'ffill')
@@ -2336,6 +2438,29 @@ def test_evenly_divisible_with_no_extra_bins(self):
23362438
assert_frame_equal(result, expected)
23372439

23382440

2441+
class TestTimedeltaIndex(Base, tm.TestCase):
2442+
_multiprocess_can_split_ = True
2443+
_index_factory = lambda x: timedelta_range
2444+
2445+
def create_series(self):
2446+
i = timedelta_range('1 day',
2447+
'10 day', freq='D')
2448+
2449+
return Series(np.arange(len(i)), index=i, name='tdi')
2450+
2451+
def test_asfreq_bug(self):
2452+
2453+
import datetime as dt
2454+
df = DataFrame(data=[1, 3],
2455+
index=[dt.timedelta(), dt.timedelta(minutes=3)])
2456+
result = df.resample('1T').asfreq()
2457+
expected = DataFrame(data=[1, np.nan, np.nan, 3],
2458+
index=timedelta_range('0 day',
2459+
periods=4,
2460+
freq='1T'))
2461+
assert_frame_equal(result, expected)
2462+
2463+
23392464
class TestTimeGrouper(tm.TestCase):
23402465
def setUp(self):
23412466
self.ts = Series(np.random.randn(1000),

0 commit comments

Comments
 (0)