diff --git a/doc/source/whatsnew/v0.18.1.txt b/doc/source/whatsnew/v0.18.1.txt index f02feccbf941c..565b0229269f4 100644 --- a/doc/source/whatsnew/v0.18.1.txt +++ b/doc/source/whatsnew/v0.18.1.txt @@ -287,6 +287,7 @@ Bug Fixes - Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`) +- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`) - Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`) - Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 058a8db9ead08..faa56132dc63f 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -1124,6 +1124,26 @@ def _maybe_add_count(base, count): return base +def _maybe_coerce_freq(code): + """ we might need to coerce a code to a rule_code + and uppercase it + + Parameters + ---------- + source : string + Frequency converting from + + Returns + ------- + string code + """ + + assert code is not None + if isinstance(code, offsets.DateOffset): + code = code.rule_code + return code.upper() + + def is_subperiod(source, target): """ Returns True if downsampling is possible between source and target @@ -1140,14 +1160,12 @@ def is_subperiod(source, target): ------- is_subperiod : boolean """ - if isinstance(source, offsets.DateOffset): - source = source.rule_code - if isinstance(target, offsets.DateOffset): - target = target.rule_code + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) - target = target.upper() - source = source.upper() if _is_annual(target): if _is_quarterly(source): return _quarter_months_conform(_get_rule_month(source), @@ -1195,14 +1213,11 @@ def is_superperiod(source, target): ------- is_superperiod : boolean """ - if isinstance(source, offsets.DateOffset): - source = source.rule_code - - if isinstance(target, offsets.DateOffset): - target = target.rule_code + if target is None or source is None: + return False + source = _maybe_coerce_freq(source) + target = _maybe_coerce_freq(target) - target = target.upper() - source = source.upper() if _is_annual(source): if _is_annual(target): return _get_rule_month(source) == _get_rule_month(target) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index 409d104e5eb71..cb02197ca2150 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -595,6 +595,14 @@ def _downsample(self, how, **kwargs): return self._wrap_result(result) + def _adjust_binner_for_upsample(self, binner): + """ adjust our binner when upsampling """ + if self.closed == 'right': + binner = binner[1:] + else: + binner = binner[:-1] + return binner + def _upsample(self, method, limit=None): """ method : string {'backfill', 'bfill', 'pad', 'ffill'} @@ -614,11 +622,7 @@ def _upsample(self, method, limit=None): ax = self.ax obj = self._selected_obj binner = self.binner - - if self.closed == 'right': - res_index = binner[1:] - else: - res_index = binner[:-1] + res_index = self._adjust_binner_for_upsample(binner) # if we have the same frequency as our axis, then we are equal sampling if limit is None and to_offset(ax.inferred_freq) == self.freq: @@ -764,6 +768,20 @@ class TimedeltaResampler(DatetimeIndexResampler): def _get_binner_for_time(self): return self.groupby._get_time_delta_bins(self.ax) + def _adjust_binner_for_upsample(self, binner): + """ adjust our binner when upsampling """ + ax = self.ax + + if is_subperiod(ax.freq, self.freq): + # We are actually downsampling + # but are in the asfreq path + # GH 12926 + if self.closed == 'right': + binner = binner[1:] + else: + binner = binner[:-1] + return binner + def resample(obj, kind=None, **kwds): """ create a TimeGrouper and return our resampler """ @@ -1004,8 +1022,11 @@ def _get_time_delta_bins(self, ax): data=[], freq=self.freq, name=ax.name) return binner, [], labels - labels = binner = TimedeltaIndex(start=ax[0], - end=ax[-1], + # we need 1 extra bin here to accomodate the self.closed + start = ax[0] + end = ax[-1] + labels = binner = TimedeltaIndex(start=start, + end=end, freq=self.freq, name=ax.name) diff --git a/pandas/tseries/tests/test_frequencies.py b/pandas/tseries/tests/test_frequencies.py index 876f95c1b27d7..528b9cc0b08a9 100644 --- a/pandas/tseries/tests/test_frequencies.py +++ b/pandas/tseries/tests/test_frequencies.py @@ -670,6 +670,15 @@ def test_legacy_offset_warnings(self): def test_is_superperiod_subperiod(): + + # input validation + assert not (frequencies.is_superperiod(offsets.YearEnd(), None)) + assert not (frequencies.is_subperiod(offsets.MonthEnd(), None)) + assert not (frequencies.is_superperiod(None, offsets.YearEnd())) + assert not (frequencies.is_subperiod(None, offsets.MonthEnd())) + assert not (frequencies.is_superperiod(None, None)) + assert not (frequencies.is_subperiod(None, None)) + assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd())) assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd())) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 40ecbbb4c147a..2efc9c9d97be7 100644 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -21,6 +21,7 @@ from pandas.tseries.period import period_range, PeriodIndex, Period from pandas.tseries.resample import (DatetimeIndex, TimeGrouper, DatetimeIndexResampler) +from pandas.tseries.frequencies import to_offset from pandas.tseries.tdi import timedelta_range from pandas.util.testing import (assert_series_equal, assert_almost_equal, assert_frame_equal) @@ -35,6 +36,16 @@ resample_methods = downsample_methods + upsample_methods + series_methods +def _simple_ts(start, end, freq='D'): + rng = date_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + +def _simple_pts(start, end, freq='D'): + rng = period_range(start, end, freq=freq) + return Series(np.random.randn(len(rng)), index=rng) + + class TestResampleAPI(tm.TestCase): _multiprocess_can_split_ = True @@ -566,8 +577,50 @@ def test_agg_consistency(self): assert_frame_equal(result, expected) -class TestResample(tm.TestCase): +class Base(object): + """ + base class for resampling testing, calling + .create_series() generates a series of each index type + """ + def create_index(self, *args, **kwargs): + """ return the _index_factory created using the args, kwargs """ + factory = self._index_factory() + return factory(*args, **kwargs) + + def test_asfreq_downsample(self): + s = self.create_series() + + result = s.resample('2D').asfreq() + expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2))) + expected.index.freq = to_offset('2D') + assert_series_equal(result, expected) + + frame = s.to_frame('value') + result = frame.resample('2D').asfreq() + expected = frame.reindex( + frame.index.take(np.arange(0, len(frame.index), 2))) + expected.index.freq = to_offset('2D') + assert_frame_equal(result, expected) + + def test_asfreq_upsample(self): + s = self.create_series() + + result = s.resample('1H').asfreq() + new_index = self.create_index(s.index[0], s.index[-1], freq='1H') + expected = s.reindex(new_index) + assert_series_equal(result, expected) + + frame = s.to_frame('value') + result = frame.resample('1H').asfreq() + new_index = self.create_index(frame.index[0], + frame.index[-1], freq='1H') + expected = frame.reindex(new_index) + assert_frame_equal(result, expected) + + +class TestDatetimeIndex(Base, tm.TestCase): _multiprocess_can_split_ = True + _index_factory = lambda x: date_range def setUp(self): dti = DatetimeIndex(start=datetime(2005, 1, 1), @@ -575,6 +628,12 @@ def setUp(self): self.series = Series(np.random.rand(len(dti)), dti) + def create_series(self): + i = date_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') + + return Series(np.arange(len(i)), index=i, name='dti') + def test_custom_grouper(self): dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1), @@ -1798,18 +1857,61 @@ def test_resmaple_dst_anchor(self): 'D Frequency') -def _simple_ts(start, end, freq='D'): - rng = date_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) +class TestPeriodIndex(Base, tm.TestCase): + _multiprocess_can_split_ = True + _index_factory = lambda x: period_range + def create_series(self): + i = period_range(datetime(2005, 1, 1), + datetime(2005, 1, 10), freq='D') -def _simple_pts(start, end, freq='D'): - rng = period_range(start, end, freq=freq) - return Series(np.random.randn(len(rng)), index=rng) + return Series(np.arange(len(i)), index=i, name='pi') + def test_asfreq_downsample(self): -class TestResamplePeriodIndex(tm.TestCase): - _multiprocess_can_split_ = True + # series + s = self.create_series() + expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2))) + expected.index = expected.index.to_timestamp() + expected.index.freq = to_offset('2D') + + # this is a bug, this *should* return a PeriodIndex + # directly + # GH 12884 + result = s.resample('2D').asfreq() + assert_series_equal(result, expected) + + # frame + frame = s.to_frame('value') + expected = frame.reindex( + frame.index.take(np.arange(0, len(frame.index), 2))) + expected.index = expected.index.to_timestamp() + expected.index.freq = to_offset('2D') + result = frame.resample('2D').asfreq() + assert_frame_equal(result, expected) + + def test_asfreq_upsample(self): + + # this is a bug, this *should* return a PeriodIndex + # directly + # GH 12884 + s = self.create_series() + new_index = date_range(s.index[0].to_timestamp(how='start'), + (s.index[-1] + 1).to_timestamp(how='start'), + freq='1H', + closed='left') + expected = s.to_timestamp().reindex(new_index).to_period() + result = s.resample('1H').asfreq() + assert_series_equal(result, expected) + + frame = s.to_frame('value') + new_index = date_range(frame.index[0].to_timestamp(how='start'), + (frame.index[-1] + 1).to_timestamp(how='start'), + freq='1H', + closed='left') + expected = frame.to_timestamp().reindex(new_index).to_period() + result = frame.resample('1H').asfreq() + assert_frame_equal(result, expected) def test_annual_upsample_D_s_f(self): self._check_annual_upsample_cases('D', 'start', 'ffill') @@ -2336,6 +2438,29 @@ def test_evenly_divisible_with_no_extra_bins(self): assert_frame_equal(result, expected) +class TestTimedeltaIndex(Base, tm.TestCase): + _multiprocess_can_split_ = True + _index_factory = lambda x: timedelta_range + + def create_series(self): + i = timedelta_range('1 day', + '10 day', freq='D') + + return Series(np.arange(len(i)), index=i, name='tdi') + + def test_asfreq_bug(self): + + import datetime as dt + df = DataFrame(data=[1, 3], + index=[dt.timedelta(), dt.timedelta(minutes=3)]) + result = df.resample('1T').asfreq() + expected = DataFrame(data=[1, np.nan, np.nan, 3], + index=timedelta_range('0 day', + periods=4, + freq='1T')) + assert_frame_equal(result, expected) + + class TestTimeGrouper(tm.TestCase): def setUp(self): self.ts = Series(np.random.randn(1000),