Skip to content

Commit 5a2b19b

Browse files
committed
BUG: Resample across multiple days
Fixes an issue where resampling over multiple days causes a ValueError when a number of days between the normalized first and normalized last days is not a multiple of the frequency. Added test TestResample.test_resample_anchored_multiday Closes pandas-dev#8683
1 parent a3e478d commit 5a2b19b

File tree

3 files changed

+32
-4
lines changed

3 files changed

+32
-4
lines changed

doc/source/whatsnew/v0.15.2.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,8 @@ Bug Fixes
145145

146146

147147

148-
148+
- Bug in resample that causes a ValueError when resampling across multiple days
149+
and the last offset is not calculated from the start of the range (:issue:`8683`)
149150

150151

151152
- Bug in `pd.infer_freq`/`DataFrame.inferred_freq` that prevented proper sub-daily frequency inference

pandas/tseries/resample.py

+7-3
Original file line numberDiff line numberDiff line change
@@ -411,15 +411,19 @@ def _get_range_edges(first, last, offset, closed='left', base=0):
411411
def _adjust_dates_anchored(first, last, offset, closed='right', base=0):
412412
from pandas.tseries.tools import normalize_date
413413

414+
# First and last offsets should be calculated from the start day to fix an
415+
# error cause by resampling across multiple days when a one day period is
416+
# not a multiple of the frequency.
417+
#
418+
# See https://github.com/pydata/pandas/issues/8683
419+
414420
start_day_nanos = Timestamp(normalize_date(first)).value
415-
last_day_nanos = Timestamp(normalize_date(last)).value
416421

417422
base_nanos = (base % offset.n) * offset.nanos // offset.n
418423
start_day_nanos += base_nanos
419-
last_day_nanos += base_nanos
420424

421425
foffset = (first.value - start_day_nanos) % offset.nanos
422-
loffset = (last.value - last_day_nanos) % offset.nanos
426+
loffset = (last.value - start_day_nanos) % offset.nanos
423427

424428
if closed == 'right':
425429
if foffset > 0:

pandas/tseries/tests/test_resample.py

+23
Original file line numberDiff line numberDiff line change
@@ -705,6 +705,29 @@ def test_resample_anchored_monthstart(self):
705705
for freq in freqs:
706706
result = ts.resample(freq, how='mean')
707707

708+
def test_resample_anchored_multiday(self):
709+
# When resampling a range spanning multiple days, ensure that the
710+
# start date gets used to determine the offset. Fixes issue where
711+
# a one day period is not a multiple of the frequency.
712+
#
713+
# See: https://github.com/pydata/pandas/issues/8683
714+
715+
s = pd.Series(np.random.randn(5),
716+
index=pd.date_range('2014-10-14 23:06:23.206',
717+
periods=3, freq='400L')
718+
| pd.date_range('2014-10-15 23:00:00',
719+
periods=2, freq='2200L'))
720+
721+
# Ensure left closing works
722+
result = s.resample('2200L', 'mean')
723+
self.assertEqual(result.index[-1],
724+
pd.Timestamp('2014-10-15 23:00:02.000'))
725+
726+
# Ensure right closing works
727+
result = s.resample('2200L', 'mean', label='right')
728+
self.assertEqual(result.index[-1],
729+
pd.Timestamp('2014-10-15 23:00:04.200'))
730+
708731
def test_corner_cases(self):
709732
# miscellaneous test coverage
710733

0 commit comments

Comments
 (0)