From f6ada6e69600cbe838ff25d667d46a4c166ea357 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 22 Sep 2018 15:55:48 -0700 Subject: [PATCH 1/2] BUG: Avoid AmbiguousTime or NonExistenTime Error when resampling --- pandas/core/resample.py | 36 ++++++++++++++++------------------- pandas/tests/test_resample.py | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 20 deletions(-) diff --git a/pandas/core/resample.py b/pandas/core/resample.py index 1ef8a0854887b..878ac957a8557 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -1328,8 +1328,7 @@ def _get_time_bins(self, ax): data=[], freq=self.freq, name=ax.name) return binner, [], labels - first, last = ax.min(), ax.max() - first, last = _get_range_edges(first, last, self.freq, + first, last = _get_range_edges(ax.min(), ax.max(), self.freq, closed=self.closed, base=self.base) tz = ax.tz @@ -1519,9 +1518,6 @@ def _take_new_index(obj, indexer, new_index, axis=0): def _get_range_edges(first, last, offset, closed='left', base=0): - if isinstance(offset, compat.string_types): - offset = to_offset(offset) - if isinstance(offset, Tick): is_day = isinstance(offset, Day) day_nanos = delta_to_nanoseconds(timedelta(1)) @@ -1531,8 +1527,7 @@ def _get_range_edges(first, last, offset, closed='left', base=0): return _adjust_dates_anchored(first, last, offset, closed=closed, base=base) - if not isinstance(offset, Tick): # and first.time() != last.time(): - # hack! + else: first = first.normalize() last = last.normalize() @@ -1553,19 +1548,16 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # # See https://github.com/pandas-dev/pandas/issues/8683 - # 14682 - Since we need to drop the TZ information to perform - # the adjustment in the presence of a DST change, - # save TZ Info and the DST state of the first and last parameters - # so that we can accurately rebuild them at the end. + # GH 10117 & GH 19375. If first and last contain timezone information, + # Perform the calculation in UTC in order to avoid localizing on an + # Ambiguous or Nonexistent time. first_tzinfo = first.tzinfo last_tzinfo = last.tzinfo - first_dst = bool(first.dst()) - last_dst = bool(last.dst()) - - first = first.tz_localize(None) - last = last.tz_localize(None) - start_day_nanos = first.normalize().value + if first_tzinfo is not None: + first = first.tz_convert('UTC') + if last_tzinfo is not None: + last = last.tz_convert('UTC') base_nanos = (base % offset.n) * offset.nanos // offset.n start_day_nanos += base_nanos @@ -1598,9 +1590,13 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): lresult = last.value + (offset.nanos - loffset) else: lresult = last.value + offset.nanos - - return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst), - Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst)) + fresult = Timestamp(fresult) + lresult = Timestamp(lresult) + if first_tzinfo is not None: + fresult = fresult.tz_localize('UTC').tz_convert(first_tzinfo) + if last_tzinfo is not None: + lresult = lresult.tz_localize('UTC').tz_convert(last_tzinfo) + return fresult, lresult def asfreq(obj, freq, method=None, how=None, normalize=False, fill_value=None): diff --git a/pandas/tests/test_resample.py b/pandas/tests/test_resample.py index 377253574d2c1..ccd2461d1512e 100644 --- a/pandas/tests/test_resample.py +++ b/pandas/tests/test_resample.py @@ -2485,6 +2485,22 @@ def test_with_local_timezone_dateutil(self): expected = Series(1, index=expected_index) assert_series_equal(result, expected) + def test_resample_nonexistent_time_bin_edge(self): + # GH 19375 + index = date_range('2017-03-12', '2017-03-12 1:45:00', freq='15T') + s = Series(np.zeros(len(index)), index=index) + expected = s.tz_localize('US/Pacific') + result = expected.resample('900S').mean() + tm.assert_series_equal(result, expected) + + def test_resample_ambiguous_time_bin_edge(self): + # GH 10117 + idx = pd.date_range("2014-10-25 22:00:00", "2014-10-26 00:30:00", + freq="30T", tz="Europe/London") + expected = Series(np.zeros(len(idx)), index=idx) + result = expected.resample('30T').mean() + tm.assert_series_equal(result, expected) + def test_fill_method_and_how_upsample(self): # GH2073 s = Series(np.arange(9, dtype='int64'), From acd9128d73334e7a8da00ff2963a443c17311791 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 22 Sep 2018 15:56:02 -0700 Subject: [PATCH 2/2] Add whatsnew --- doc/source/whatsnew/v0.24.0.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ed1bf0a4f8394..31ef70703e2ca 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -679,6 +679,7 @@ Timezones - Bug when setting a new value with :meth:`DataFrame.loc` with a :class:`DatetimeIndex` with a DST transition (:issue:`18308`, :issue:`20724`) - Bug in :meth:`DatetimeIndex.unique` that did not re-localize tz-aware dates correctly (:issue:`21737`) - Bug when indexing a :class:`Series` with a DST transition (:issue:`21846`) +- Bug in :meth:`DataFrame.resample` and :meth:`Series.resample` where an ``AmbiguousTimeError`` or ``NonExistentTimeError`` would raise if a timezone aware timeseries ended on a DST transition (:issue:`19375`, :issue:`10117`) Offsets ^^^^^^^