Skip to content

BUG: .asfreq on resample on PeriodIndex/TimedeltaIndex are not #12928

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.18.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -287,6 +287,7 @@ Bug Fixes


- Bug in ``.loc`` with out-of-bounds in a large indexer would raise ``IndexError`` rather than ``KeyError`` (:issue:`12527`)
- Bug in resampling when using a ``TimedeltaIndex`` and ``.asfreq()``, would previously not include the final fencepost (:issue:`12926`)
- Bug in equality testing with a ``Categorical`` in a ``DataFrame`` (:issue:`12564`)
- Bug in ``GroupBy.first()``, ``.last()`` returns incorrect row when ``TimeGrouper`` is used (:issue:`7453`)

Expand Down
41 changes: 28 additions & 13 deletions pandas/tseries/frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,26 @@ def _maybe_add_count(base, count):
return base


def _maybe_coerce_freq(code):
""" we might need to coerce a code to a rule_code
and uppercase it

Parameters
----------
source : string
Frequency converting from

Returns
-------
string code
"""

assert code is not None
if isinstance(code, offsets.DateOffset):
code = code.rule_code
return code.upper()


def is_subperiod(source, target):
"""
Returns True if downsampling is possible between source and target
Expand All @@ -1140,14 +1160,12 @@ def is_subperiod(source, target):
-------
is_subperiod : boolean
"""
if isinstance(source, offsets.DateOffset):
source = source.rule_code

if isinstance(target, offsets.DateOffset):
target = target.rule_code
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)

target = target.upper()
source = source.upper()
if _is_annual(target):
if _is_quarterly(source):
return _quarter_months_conform(_get_rule_month(source),
Expand Down Expand Up @@ -1195,14 +1213,11 @@ def is_superperiod(source, target):
-------
is_superperiod : boolean
"""
if isinstance(source, offsets.DateOffset):
source = source.rule_code

if isinstance(target, offsets.DateOffset):
target = target.rule_code
if target is None or source is None:
return False
source = _maybe_coerce_freq(source)
target = _maybe_coerce_freq(target)

target = target.upper()
source = source.upper()
if _is_annual(source):
if _is_annual(target):
return _get_rule_month(source) == _get_rule_month(target)
Expand Down
35 changes: 28 additions & 7 deletions pandas/tseries/resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -595,6 +595,14 @@ def _downsample(self, how, **kwargs):

return self._wrap_result(result)

def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner

def _upsample(self, method, limit=None):
"""
method : string {'backfill', 'bfill', 'pad', 'ffill'}
Expand All @@ -614,11 +622,7 @@ def _upsample(self, method, limit=None):
ax = self.ax
obj = self._selected_obj
binner = self.binner

if self.closed == 'right':
res_index = binner[1:]
else:
res_index = binner[:-1]
res_index = self._adjust_binner_for_upsample(binner)

# if we have the same frequency as our axis, then we are equal sampling
if limit is None and to_offset(ax.inferred_freq) == self.freq:
Expand Down Expand Up @@ -764,6 +768,20 @@ class TimedeltaResampler(DatetimeIndexResampler):
def _get_binner_for_time(self):
return self.groupby._get_time_delta_bins(self.ax)

def _adjust_binner_for_upsample(self, binner):
""" adjust our binner when upsampling """
ax = self.ax

if is_subperiod(ax.freq, self.freq):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYI this could super up

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't work. I think there is something going on w.r.t. how the values come back for DTI vs. TDI, meaning TDI always adds a stamp on (then removes it here). There is lots of internal logic, e.g. _adjust_bin_edges that is doing things.

# We are actually downsampling
# but are in the asfreq path
# GH 12926
if self.closed == 'right':
binner = binner[1:]
else:
binner = binner[:-1]
return binner


def resample(obj, kind=None, **kwds):
""" create a TimeGrouper and return our resampler """
Expand Down Expand Up @@ -1004,8 +1022,11 @@ def _get_time_delta_bins(self, ax):
data=[], freq=self.freq, name=ax.name)
return binner, [], labels

labels = binner = TimedeltaIndex(start=ax[0],
end=ax[-1],
# we need 1 extra bin here to accomodate the self.closed
start = ax[0]
end = ax[-1]
labels = binner = TimedeltaIndex(start=start,
end=end,
freq=self.freq,
name=ax.name)

Expand Down
9 changes: 9 additions & 0 deletions pandas/tseries/tests/test_frequencies.py
Original file line number Diff line number Diff line change
Expand Up @@ -670,6 +670,15 @@ def test_legacy_offset_warnings(self):


def test_is_superperiod_subperiod():

# input validation
assert not (frequencies.is_superperiod(offsets.YearEnd(), None))
assert not (frequencies.is_subperiod(offsets.MonthEnd(), None))
assert not (frequencies.is_superperiod(None, offsets.YearEnd()))
assert not (frequencies.is_subperiod(None, offsets.MonthEnd()))
assert not (frequencies.is_superperiod(None, None))
assert not (frequencies.is_subperiod(None, None))

assert (frequencies.is_superperiod(offsets.YearEnd(), offsets.MonthEnd()))
assert (frequencies.is_subperiod(offsets.MonthEnd(), offsets.YearEnd()))

Expand Down
143 changes: 134 additions & 9 deletions pandas/tseries/tests/test_resample.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from pandas.tseries.period import period_range, PeriodIndex, Period
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
DatetimeIndexResampler)
from pandas.tseries.frequencies import to_offset
from pandas.tseries.tdi import timedelta_range
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
assert_frame_equal)
Expand All @@ -35,6 +36,16 @@
resample_methods = downsample_methods + upsample_methods + series_methods


def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)


def _simple_pts(start, end, freq='D'):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)


class TestResampleAPI(tm.TestCase):
_multiprocess_can_split_ = True

Expand Down Expand Up @@ -566,15 +577,63 @@ def test_agg_consistency(self):
assert_frame_equal(result, expected)


class TestResample(tm.TestCase):
class Base(object):
"""
base class for resampling testing, calling
.create_series() generates a series of each index type
"""
def create_index(self, *args, **kwargs):
""" return the _index_factory created using the args, kwargs """
factory = self._index_factory()
return factory(*args, **kwargs)

def test_asfreq_downsample(self):
s = self.create_series()

result = s.resample('2D').asfreq()
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
expected.index.freq = to_offset('2D')
assert_series_equal(result, expected)

frame = s.to_frame('value')
result = frame.resample('2D').asfreq()
expected = frame.reindex(
frame.index.take(np.arange(0, len(frame.index), 2)))
expected.index.freq = to_offset('2D')
assert_frame_equal(result, expected)

def test_asfreq_upsample(self):
s = self.create_series()

result = s.resample('1H').asfreq()
new_index = self.create_index(s.index[0], s.index[-1], freq='1H')
expected = s.reindex(new_index)
assert_series_equal(result, expected)

frame = s.to_frame('value')
result = frame.resample('1H').asfreq()
new_index = self.create_index(frame.index[0],
frame.index[-1], freq='1H')
expected = frame.reindex(new_index)
assert_frame_equal(result, expected)


class TestDatetimeIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: date_range

def setUp(self):
dti = DatetimeIndex(start=datetime(2005, 1, 1),
end=datetime(2005, 1, 10), freq='Min')

self.series = Series(np.random.rand(len(dti)), dti)

def create_series(self):
i = date_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')

return Series(np.arange(len(i)), index=i, name='dti')

def test_custom_grouper(self):

dti = DatetimeIndex(freq='Min', start=datetime(2005, 1, 1),
Expand Down Expand Up @@ -1798,18 +1857,61 @@ def test_resmaple_dst_anchor(self):
'D Frequency')


def _simple_ts(start, end, freq='D'):
rng = date_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
class TestPeriodIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: period_range

def create_series(self):
i = period_range(datetime(2005, 1, 1),
datetime(2005, 1, 10), freq='D')

def _simple_pts(start, end, freq='D'):
rng = period_range(start, end, freq=freq)
return Series(np.random.randn(len(rng)), index=rng)
return Series(np.arange(len(i)), index=i, name='pi')

def test_asfreq_downsample(self):

class TestResamplePeriodIndex(tm.TestCase):
_multiprocess_can_split_ = True
# series
s = self.create_series()
expected = s.reindex(s.index.take(np.arange(0, len(s.index), 2)))
expected.index = expected.index.to_timestamp()
expected.index.freq = to_offset('2D')

# this is a bug, this *should* return a PeriodIndex
# directly
# GH 12884
result = s.resample('2D').asfreq()
assert_series_equal(result, expected)

# frame
frame = s.to_frame('value')
expected = frame.reindex(
frame.index.take(np.arange(0, len(frame.index), 2)))
expected.index = expected.index.to_timestamp()
expected.index.freq = to_offset('2D')
result = frame.resample('2D').asfreq()
assert_frame_equal(result, expected)

def test_asfreq_upsample(self):

# this is a bug, this *should* return a PeriodIndex
# directly
# GH 12884
s = self.create_series()
new_index = date_range(s.index[0].to_timestamp(how='start'),
(s.index[-1] + 1).to_timestamp(how='start'),
freq='1H',
closed='left')
expected = s.to_timestamp().reindex(new_index).to_period()
result = s.resample('1H').asfreq()
assert_series_equal(result, expected)

frame = s.to_frame('value')
new_index = date_range(frame.index[0].to_timestamp(how='start'),
(frame.index[-1] + 1).to_timestamp(how='start'),
freq='1H',
closed='left')
expected = frame.to_timestamp().reindex(new_index).to_period()
result = frame.resample('1H').asfreq()
assert_frame_equal(result, expected)

def test_annual_upsample_D_s_f(self):
self._check_annual_upsample_cases('D', 'start', 'ffill')
Expand Down Expand Up @@ -2336,6 +2438,29 @@ def test_evenly_divisible_with_no_extra_bins(self):
assert_frame_equal(result, expected)


class TestTimedeltaIndex(Base, tm.TestCase):
_multiprocess_can_split_ = True
_index_factory = lambda x: timedelta_range

def create_series(self):
i = timedelta_range('1 day',
'10 day', freq='D')

return Series(np.arange(len(i)), index=i, name='tdi')

def test_asfreq_bug(self):

import datetime as dt
df = DataFrame(data=[1, 3],
index=[dt.timedelta(), dt.timedelta(minutes=3)])
result = df.resample('1T').asfreq()
expected = DataFrame(data=[1, np.nan, np.nan, 3],
index=timedelta_range('0 day',
periods=4,
freq='1T'))
assert_frame_equal(result, expected)


class TestTimeGrouper(tm.TestCase):
def setUp(self):
self.ts = Series(np.random.randn(1000),
Expand Down