From 940fb223864737f001c2820111dc72ca37646e0b Mon Sep 17 00:00:00 2001 From: Julian Santander Date: Thu, 17 Nov 2016 23:45:53 +0100 Subject: [PATCH 1/4] Avoid AmbiguousTimeError on groupby --- pandas/tseries/index.py | 2 +- pandas/tseries/resample.py | 9 +++------ 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/pandas/tseries/index.py b/pandas/tseries/index.py index 70e2d2c121773..024306edef2d8 100644 --- a/pandas/tseries/index.py +++ b/pandas/tseries/index.py @@ -439,7 +439,7 @@ def _generate(cls, start, end, periods, name, offset, tz = tz.localize(date.replace(tzinfo=None)).tzinfo if tz is not None and inferred_tz is not None: - if not inferred_tz == tz: + if not tslib.get_timezone(inferred_tz) == tslib.get_timezone(tz): raise AssertionError("Inferred time zone not equal to passed " "time zone") diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index d02c403cb3c66..c2df03b5a4316 100644 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -1284,8 +1284,7 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # See https://github.com/pandas-dev/pandas/issues/8683 first_tzinfo = first.tzinfo - first = first.tz_localize(None) - last = last.tz_localize(None) + last_tzinfo = last.tzinfo start_day_nanos = first.normalize().value base_nanos = (base % offset.n) * offset.nanos // offset.n @@ -1320,11 +1319,9 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): else: lresult = last.value + offset.nanos -# return (Timestamp(fresult, tz=first.tz), -# Timestamp(lresult, tz=last.tz)) + return (Timestamp(fresult, tz=first_tzinfo), + Timestamp(lresult, tz=last_tzinfo)) - return (Timestamp(fresult).tz_localize(first_tzinfo), - Timestamp(lresult).tz_localize(first_tzinfo)) def asfreq(obj, freq, method=None, how=None, normalize=False): From 99a53675669d296bc28a8d70a2a520b51b69368d Mon Sep 17 00:00:00 2001 From: Julian Santander Date: Fri, 18 Nov 2016 12:41:28 +0100 Subject: [PATCH 2/4] Fix unittest error and lint warning --- pandas/tseries/resample.py | 12 +++++++++--- pandas/tseries/tests/test_resample.py | 13 ++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) mode change 100644 => 100755 pandas/tseries/resample.py mode change 100644 => 100755 pandas/tseries/tests/test_resample.py diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py old mode 100644 new mode 100755 index c2df03b5a4316..d17b555a35f04 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -5,6 +5,7 @@ import pandas as pd from pandas.core.base import AbstractMethodError, GroupByMixin +from pandas.core.config_init import pc_ambiguous_as_wide_doc from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, SeriesGroupBy, groupby, PanelGroupBy) @@ -1285,6 +1286,12 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): first_tzinfo = first.tzinfo last_tzinfo = last.tzinfo + first_dst = bool(first.dst()) + last_dst = bool(last.dst()) + + first = first.tz_localize(None) + last = last.tz_localize(None) + start_day_nanos = first.normalize().value base_nanos = (base % offset.n) * offset.nanos // offset.n @@ -1319,9 +1326,8 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): else: lresult = last.value + offset.nanos - return (Timestamp(fresult, tz=first_tzinfo), - Timestamp(lresult, tz=last_tzinfo)) - + return (Timestamp(fresult).tz_localize(first_tzinfo, ambiguous=first_dst), + Timestamp(lresult).tz_localize(last_tzinfo, ambiguous=last_dst)) def asfreq(obj, freq, method=None, how=None, normalize=False): diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py old mode 100644 new mode 100755 index 9d3d27f3224b4..671c542085159 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1912,7 +1912,18 @@ def test_resample_size(self): right = Series(val, index=ix) assert_series_equal(left, right) - def test_resmaple_dst_anchor(self): + def test_resample_across_dst(self): + #14682 + df1 = DataFrame([1477786980, 1477790580], columns=['ts']) + df2 = DataFrame([1477785600, 1477789200], columns=['ts']) + dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Madrid')) + dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Madrid')) + df = DataFrame([5, 5], index=dti1) + assert_frame_equal( + df.resample(rule='H').sum(), + DataFrame([5, 5], index=dti2)) + + def test_resample_dst_anchor(self): # 5172 dti = DatetimeIndex([datetime(2012, 11, 4, 23)], tz='US/Eastern') df = DataFrame([5], index=dti) From 817ed97710e6c1e470219f876201764de4ef756a Mon Sep 17 00:00:00 2001 From: Julian Santander Date: Fri, 18 Nov 2016 21:07:33 +0100 Subject: [PATCH 3/4] Addressing code inspections comments --- doc/source/whatsnew/v0.19.2.txt | 1 + pandas/tseries/resample.py | 5 ++++- pandas/tseries/tests/test_resample.py | 19 ++++++++++++++++--- 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index f4a45a6938a95..820eff1fc719a 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -43,3 +43,4 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) +- Fix AmbiguousTimeError exception when resampling a DatetimeIndex in local TZ, covering a DST change (:issue:`14682`) diff --git a/pandas/tseries/resample.py b/pandas/tseries/resample.py index d17b555a35f04..31781eb3fc131 100755 --- a/pandas/tseries/resample.py +++ b/pandas/tseries/resample.py @@ -5,7 +5,6 @@ import pandas as pd from pandas.core.base import AbstractMethodError, GroupByMixin -from pandas.core.config_init import pc_ambiguous_as_wide_doc from pandas.core.groupby import (BinGrouper, Grouper, _GroupBy, GroupBy, SeriesGroupBy, groupby, PanelGroupBy) @@ -1284,6 +1283,10 @@ def _adjust_dates_anchored(first, last, offset, closed='right', base=0): # # See https://github.com/pandas-dev/pandas/issues/8683 + # 14682 - Since we need to drop the TZ information to perform + # the adjustment in the presence of a DST change, + # save TZ Info and the DST state of the first and last parameters + # so that we can accurately rebuild them at the end. first_tzinfo = first.tzinfo last_tzinfo = last.tzinfo first_dst = bool(first.dst()) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 671c542085159..403910902fe80 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1913,11 +1913,24 @@ def test_resample_size(self): assert_series_equal(left, right) def test_resample_across_dst(self): - #14682 + # The test resamples a DatetimeIndex with values before and after a + # DST change + # Issue: 14682 + + # The DatetimeIndex we will start with + # (note that DST happens at 03:00+02:00 -> 02:00+01:00) + # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 df1 = DataFrame([1477786980, 1477790580], columns=['ts']) + dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s') + .dt.tz_localize('UTC') + .dt.tz_convert('Europe/Madrid')) + + # The expected DatetimeIndex after resampling. + # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 df2 = DataFrame([1477785600, 1477789200], columns=['ts']) - dti1 = DatetimeIndex(pd.to_datetime(df1.ts, unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Madrid')) - dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s').dt.tz_localize('UTC').dt.tz_convert('Europe/Madrid')) + dti2 = DatetimeIndex(pd.to_datetime(df2.ts, unit='s') + .dt.tz_localize('UTC') + .dt.tz_convert('Europe/Madrid')) df = DataFrame([5, 5], index=dti1) assert_frame_equal( df.resample(rule='H').sum(), From d90afaf01a148a5cea15250dbe28617095a84163 Mon Sep 17 00:00:00 2001 From: Julian Santander Date: Mon, 21 Nov 2016 13:50:07 +0100 Subject: [PATCH 4/4] Addressing additional code inspection comments --- doc/source/whatsnew/v0.19.2.txt | 2 +- pandas/tseries/tests/test_resample.py | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/doc/source/whatsnew/v0.19.2.txt b/doc/source/whatsnew/v0.19.2.txt index 820eff1fc719a..7b21ad0186d4e 100644 --- a/doc/source/whatsnew/v0.19.2.txt +++ b/doc/source/whatsnew/v0.19.2.txt @@ -43,4 +43,4 @@ Bug Fixes - Explicit check in ``to_stata`` and ``StataWriter`` for out-of-range values when writing doubles (:issue:`14618`) -- Fix AmbiguousTimeError exception when resampling a DatetimeIndex in local TZ, covering a DST change (:issue:`14682`) +- Fix ``AmbiguousTimeError`` exception when resampling a ``DatetimeIndex`` in local TZ, covering a DST change (:issue:`14682`) diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py index 403910902fe80..b8c060c024867 100755 --- a/pandas/tseries/tests/test_resample.py +++ b/pandas/tseries/tests/test_resample.py @@ -1932,9 +1932,11 @@ def test_resample_across_dst(self): .dt.tz_localize('UTC') .dt.tz_convert('Europe/Madrid')) df = DataFrame([5, 5], index=dti1) - assert_frame_equal( - df.resample(rule='H').sum(), - DataFrame([5, 5], index=dti2)) + + result = df.resample(rule='H').sum() + expected = DataFrame([5, 5], index=dti2) + + assert_frame_equal(result, expected) def test_resample_dst_anchor(self): # 5172