Skip to content

Commit b2cdc02

Browse files
wcwagnerjreback
authored andcommitted
BUG: Non-string loffset not applied when resampling a timeseries
closes #13218 closes #15002 closes #14213
1 parent 5d4e92c commit b2cdc02

File tree

3 files changed

+63
-3
lines changed

3 files changed

+63
-3
lines changed

doc/source/whatsnew/v0.20.0.txt

+12
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,7 @@ Bug Fixes
328328

329329

330330

331+
- Bug in ``resample``, where a non-string ```loffset`` argument would not be applied when resampling a timeseries (:issue:`13218`)
331332

332333

333334

@@ -337,4 +338,15 @@ Bug Fixes
337338
- Require at least 0.23 version of cython to avoid problems with character encodings (:issue:`14699`)
338339
- Bug in converting object elements of array-like objects to unsigned 64-bit integers (:issue:`4471`, :issue:`14982`)
339340
- Bug in ``pd.pivot_table()`` where no error was raised when values argument was not in the columns (:issue:`14938`)
341+
342+
343+
344+
345+
346+
347+
348+
349+
350+
351+
340352
- Bug in ``pd.read_csv()`` for the C engine where ``usecols`` were being indexed incorrectly with ``parse_dates`` (:issue:`14792`)

pandas/tseries/resample.py

+20-2
Original file line numberDiff line numberDiff line change
@@ -323,6 +323,11 @@ def aggregate(self, arg, *args, **kwargs):
323323
*args,
324324
**kwargs)
325325

326+
# if arg was a string, _aggregate called resampler's _downsample or
327+
# _groupby_and_agg methods, which would've already applied the loffset
328+
if not isinstance(arg, compat.string_types):
329+
result = self._apply_loffset(result)
330+
326331
return result
327332

328333
agg = aggregate
@@ -381,7 +386,7 @@ def _gotitem(self, key, ndim, subset=None):
381386
return grouped
382387

383388
def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
384-
""" revaluate the obj with a groupby aggregation """
389+
""" re-evaluate the obj with a groupby aggregation """
385390

386391
if grouper is None:
387392
self._set_binner()
@@ -409,7 +414,14 @@ def _groupby_and_aggregate(self, how, grouper=None, *args, **kwargs):
409414
return self._wrap_result(result)
410415

411416
def _apply_loffset(self, result):
412-
"""if loffset if set, offset the result index"""
417+
"""
418+
if loffset is set, offset the result index
419+
420+
Parameters
421+
----------
422+
result : Series or DataFrame
423+
the result of resample
424+
"""
413425
loffset = self.loffset
414426
if isinstance(loffset, compat.string_types):
415427
loffset = to_offset(self.loffset)
@@ -419,6 +431,7 @@ def _apply_loffset(self, result):
419431
isinstance(result.index, DatetimeIndex) and
420432
len(result.index) > 0
421433
)
434+
422435
if needs_offset:
423436
result.index = result.index + loffset
424437

@@ -797,6 +810,11 @@ def aggregate(self, arg, *args, **kwargs):
797810
if result is None:
798811
result = self._downsample(arg, *args, **kwargs)
799812

813+
# if arg was a string, _aggregate called resamplers' _downsample or
814+
# _groupby_and_agg methods, which would've already applied the loffset
815+
if not isinstance(arg, compat.string_types):
816+
result = self._apply_loffset(result)
817+
800818
return result
801819

802820
agg = aggregate

pandas/tseries/tests/test_resample.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@
2424
from pandas.tseries.period import period_range, PeriodIndex, Period
2525
from pandas.tseries.resample import (DatetimeIndex, TimeGrouper,
2626
DatetimeIndexResampler)
27-
from pandas.tseries.tdi import timedelta_range
27+
from pandas.tseries.tdi import timedelta_range, TimedeltaIndex
2828
from pandas.util.testing import (assert_series_equal, assert_almost_equal,
2929
assert_frame_equal, assert_index_equal)
3030
from pandas._period import IncompatibleFrequency
@@ -769,6 +769,36 @@ def test_resample_empty_dtypes(self):
769769
# (ex: doing mean with dtype of np.object)
770770
pass
771771

772+
def test_resample_loffset_arg_type(self):
773+
# GH 13218, 15002
774+
df = self.create_series().to_frame('value')
775+
expected_means = [df.values[i:i + 2].mean()
776+
for i in range(0, len(df.values), 2)]
777+
expected_index = self.create_index(df.index[0],
778+
periods=len(df.index) / 2,
779+
freq='2D')
780+
# loffset coreces PeriodIndex to DateTimeIndex
781+
if isinstance(expected_index, PeriodIndex):
782+
expected_index = expected_index.to_timestamp()
783+
expected_index += timedelta(hours=2)
784+
expected = DataFrame({'value': expected_means}, index=expected_index)
785+
for arg in ['mean', {'value': 'mean'}, ['mean']]:
786+
result_agg = df.resample('2D', loffset='2H').agg(arg)
787+
with tm.assert_produces_warning(FutureWarning,
788+
check_stacklevel=False):
789+
result_how = df.resample('2D', how=arg, loffset='2H')
790+
if isinstance(arg, list):
791+
expected.columns = pd.MultiIndex.from_tuples([('value',
792+
'mean')])
793+
# GH 13022, 7687 - TODO: fix resample w/ TimedeltaIndex
794+
if isinstance(expected.index, TimedeltaIndex):
795+
with tm.assertRaises(AssertionError):
796+
assert_frame_equal(result_agg, expected)
797+
assert_frame_equal(result_how, expected)
798+
else:
799+
assert_frame_equal(result_agg, expected)
800+
assert_frame_equal(result_how, expected)
801+
772802

773803
class TestDatetimeIndex(Base, tm.TestCase):
774804
_multiprocess_can_split_ = True

0 commit comments

Comments
 (0)