From c5566d540799ab6d9b6b146e0584350d1e4fa46f Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 9 Jun 2018 14:48:49 -0700 Subject: [PATCH 1/7] BUG: Correctly localize Timestamp near DST --- pandas/_libs/tslibs/conversion.pyx | 4 ++++ .../tests/indexes/datetimes/test_construction.py | 9 +++++++++ pandas/tests/indexes/datetimes/test_date_range.py | 14 ++++++++++++++ pandas/tests/scalar/timestamp/test_timestamp.py | 8 ++++++++ 4 files changed, 35 insertions(+) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index f4841e6abb7e8..b346a9c224fe3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -350,6 +350,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, # sort of a temporary hack if ts.tzinfo is not None: if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): + # tz.localize does not correctly localize Timestamps near DST + if hasattr(ts, 'to_pydatetime'): + nanos += ts.nanosecond + ts = ts.to_pydatetime() ts = tz.normalize(ts) obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo diff --git a/pandas/tests/indexes/datetimes/test_construction.py b/pandas/tests/indexes/datetimes/test_construction.py index dae69a86910af..b138b79caac76 100644 --- a/pandas/tests/indexes/datetimes/test_construction.py +++ b/pandas/tests/indexes/datetimes/test_construction.py @@ -469,6 +469,15 @@ def test_constructor_with_non_normalized_pytz(self, tz): result = DatetimeIndex(['2010'], tz=non_norm_tz) assert pytz.timezone(tz) is result.tz + def test_constructor_timestamp_near_dst(self): + # GH 20854 + ts = [Timestamp('2016-10-30 03:00:00+0300', tz='Europe/Helsinki'), + Timestamp('2016-10-30 03:00:00+0200', tz='Europe/Helsinki')] + result = DatetimeIndex(ts) + expected = DatetimeIndex([ts[0].to_pydatetime(), + ts[1].to_pydatetime()]) + tm.assert_index_equal(result, expected) + class TestTimeSeries(object): diff --git a/pandas/tests/indexes/datetimes/test_date_range.py b/pandas/tests/indexes/datetimes/test_date_range.py index 193804b66395b..ec37bbbcb6c02 100644 --- a/pandas/tests/indexes/datetimes/test_date_range.py +++ b/pandas/tests/indexes/datetimes/test_date_range.py @@ -278,6 +278,20 @@ def test_wom_len(self, periods): res = date_range(start='20110101', periods=periods, freq='WOM-1MON') assert len(res) == periods + def test_construct_over_dst(self): + # GH 20854 + pre_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific', + ambiguous=True) + pst_dst = Timestamp('2010-11-07 01:00:00').tz_localize('US/Pacific', + ambiguous=False) + expect_data = [Timestamp('2010-11-07 00:00:00', tz='US/Pacific'), + pre_dst, + pst_dst] + expected = DatetimeIndex(expect_data) + result = date_range(start='2010-11-7', periods=3, + freq='H', tz='US/Pacific') + tm.assert_index_equal(result, expected) + class TestGenRangeGeneration(object): diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index ab87d98fca8eb..4689c7bea626f 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -528,6 +528,14 @@ def test_disallow_setting_tz(self, tz): with pytest.raises(AttributeError): ts.tz = tz + @pytest.mark.parametrize('offset', ['+0300', '+0200']) + def test_construct_timestamp_near_dst(self, offset): + # GH 20854 + expected = Timestamp('2016-10-30 03:00:00{}'.format(offset), + tz='Europe/Helsinki') + result = Timestamp(expected, tz='Europe/Helsinki') + assert result == expected + class TestTimestamp(object): From 5d207ea54716b30eb6e8649ef356fa4fc0772ff2 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 9 Jun 2018 15:24:20 -0700 Subject: [PATCH 2/7] fix #19970 --- pandas/tests/frame/test_operators.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 5df50f3d7835b..98a22dc90192d 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -992,6 +992,16 @@ def test_boolean_comparison(self): result = df == tup assert_frame_equal(result, expected) + @pytest.mark.parametrize('tz', [None, 'America/New_York']) + def test_boolean_compare_transpose_tzindex_with_dst(self, tz): + # GH 19970 + idx = date_range('20161101', '20161130', freq='4H', tz=tz) + df = DataFrame({'a':range(len(idx)), 'b':range(len(idx))}, + index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list('ab'), columns=idx) + assert_frame_equal(result, expected) + def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] From f4eb827790f356dec7a7e382ee0642126a2403ed Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sat, 9 Jun 2018 21:59:42 -0700 Subject: [PATCH 3/7] Add whatsnew and more comments --- doc/source/whatsnew/v0.23.1.txt | 4 ++++ pandas/_libs/tslibs/conversion.pyx | 7 +++++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index db25bcf8113f5..aab2f21128ec4 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -129,6 +129,10 @@ Bug Fixes - Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`) - Bug in :func:`concat` warning message providing the wrong guidance for future behavior (:issue:`21101`) +**Timezones** +- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) +- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) + **Other** - Tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index b346a9c224fe3..7711bfeca5119 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,6 +1,8 @@ # -*- coding: utf-8 -*- # cython: profile=False +from warnings import catch_warnings + cimport cython from cython cimport Py_ssize_t @@ -347,13 +349,14 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if tz is not None: tz = maybe_get_tz(tz) - # sort of a temporary hack if ts.tzinfo is not None: if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): # tz.localize does not correctly localize Timestamps near DST + # but correctly localizes datetimes if hasattr(ts, 'to_pydatetime'): nanos += ts.nanosecond - ts = ts.to_pydatetime() + with catch_warnings(record=True): + ts = ts.to_pydatetime() ts = tz.normalize(ts) obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo From c8dabe1cf0f6d03f700be9e1ac83daecf0324424 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Jun 2018 09:51:08 -0700 Subject: [PATCH 4/7] pep8 --- pandas/tests/frame/test_operators.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index 98a22dc90192d..f420cafae7148 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -996,7 +996,7 @@ def test_boolean_comparison(self): def test_boolean_compare_transpose_tzindex_with_dst(self, tz): # GH 19970 idx = date_range('20161101', '20161130', freq='4H', tz=tz) - df = DataFrame({'a':range(len(idx)), 'b':range(len(idx))}, + df = DataFrame({'a': range(len(idx)), 'b': range(len(idx))}, index=idx) result = df.T == df.T expected = DataFrame(True, index=list('ab'), columns=idx) From 795cc39a65394e8f2a8d8cc5b28fd6ac7a3c22d4 Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Sun, 10 Jun 2018 21:47:28 -0700 Subject: [PATCH 5/7] Use pydatetime_checkexact --- pandas/_libs/tslibs/conversion.pyx | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 7711bfeca5119..83cd9d87d5bb6 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -1,8 +1,6 @@ # -*- coding: utf-8 -*- # cython: profile=False -from warnings import catch_warnings - cimport cython from cython cimport Py_ssize_t @@ -353,10 +351,9 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): # tz.localize does not correctly localize Timestamps near DST # but correctly localizes datetimes - if hasattr(ts, 'to_pydatetime'): + if not PyDateTime_CheckExact(ts): nanos += ts.nanosecond - with catch_warnings(record=True): - ts = ts.to_pydatetime() + ts = ts.to_pydatetime() ts = tz.normalize(ts) obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo From 7e183a859a1c84cc52ac1f1d9f40602adf026f6a Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Mon, 11 Jun 2018 20:56:42 -0700 Subject: [PATCH 6/7] use astimezone instead of normalize --- pandas/_libs/tslibs/conversion.pyx | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 83cd9d87d5bb6..51434a4ccfb2b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -349,12 +349,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, if ts.tzinfo is not None: if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): - # tz.localize does not correctly localize Timestamps near DST - # but correctly localizes datetimes - if not PyDateTime_CheckExact(ts): - nanos += ts.nanosecond - ts = ts.to_pydatetime() - ts = tz.normalize(ts) + ts = ts.astimezone(tz) obj.value = pydatetime_to_dt64(ts, &obj.dts) obj.tzinfo = ts.tzinfo else: From ae9dc7bbda3644433e10c61ecdc31e8264f17cea Mon Sep 17 00:00:00 2001 From: Matt Roeschke Date: Tue, 12 Jun 2018 20:16:51 -0700 Subject: [PATCH 7/7] move whatsnew and simplify logic --- doc/source/whatsnew/v0.23.1.txt | 4 ---- doc/source/whatsnew/v0.23.2.txt | 4 ++++ pandas/_libs/tslibs/conversion.pyx | 21 ++++----------------- pandas/tests/frame/test_operators.py | 10 ---------- pandas/tests/frame/test_timezones.py | 10 ++++++++++ 5 files changed, 18 insertions(+), 31 deletions(-) diff --git a/doc/source/whatsnew/v0.23.1.txt b/doc/source/whatsnew/v0.23.1.txt index aab2f21128ec4..db25bcf8113f5 100644 --- a/doc/source/whatsnew/v0.23.1.txt +++ b/doc/source/whatsnew/v0.23.1.txt @@ -129,10 +129,6 @@ Bug Fixes - Bug in :func:`concat` where error was raised in concatenating :class:`Series` with numpy scalar and tuple names (:issue:`21015`) - Bug in :func:`concat` warning message providing the wrong guidance for future behavior (:issue:`21101`) -**Timezones** -- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) -- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) - **Other** - Tab completion on :class:`Index` in IPython no longer outputs deprecation warnings (:issue:`21125`) diff --git a/doc/source/whatsnew/v0.23.2.txt b/doc/source/whatsnew/v0.23.2.txt index c636e73fbd6c2..1de44ffeb4160 100644 --- a/doc/source/whatsnew/v0.23.2.txt +++ b/doc/source/whatsnew/v0.23.2.txt @@ -73,6 +73,10 @@ Bug Fixes - +**Timezones** +- Bug in :class:`Timestamp` and :class:`DatetimeIndex` where passing a :class:`Timestamp` localized after a DST transition would return a datetime before the DST transition (:issue:`20854`) +- Bug in comparing :class:`DataFrame`s with tz-aware :class:`DatetimeIndex` columns with a DST transition that raised a ``KeyError`` (:issue:`19970`) + **Other** - diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 51434a4ccfb2b..3cbef82437544 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -348,23 +348,10 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, tz = maybe_get_tz(tz) if ts.tzinfo is not None: - if hasattr(tz, 'normalize') and hasattr(ts.tzinfo, '_utcoffset'): - ts = ts.astimezone(tz) - obj.value = pydatetime_to_dt64(ts, &obj.dts) - obj.tzinfo = ts.tzinfo - else: - # tzoffset - try: - tz = ts.astimezone(tz).tzinfo - except: - pass - obj.value = pydatetime_to_dt64(ts, &obj.dts) - ts_offset = get_utcoffset(ts.tzinfo, ts) - obj.value -= int(ts_offset.total_seconds() * 1e9) - tz_offset = get_utcoffset(tz, ts) - obj.value += int(tz_offset.total_seconds() * 1e9) - dt64_to_dtstruct(obj.value, &obj.dts) - obj.tzinfo = tz + # Convert the current timezone to the passed timezone + ts = ts.astimezone(tz) + obj.value = pydatetime_to_dt64(ts, &obj.dts) + obj.tzinfo = ts.tzinfo elif not is_utc(tz): ts = _localize_pydatetime(ts, tz) obj.value = pydatetime_to_dt64(ts, &obj.dts) diff --git a/pandas/tests/frame/test_operators.py b/pandas/tests/frame/test_operators.py index f420cafae7148..5df50f3d7835b 100644 --- a/pandas/tests/frame/test_operators.py +++ b/pandas/tests/frame/test_operators.py @@ -992,16 +992,6 @@ def test_boolean_comparison(self): result = df == tup assert_frame_equal(result, expected) - @pytest.mark.parametrize('tz', [None, 'America/New_York']) - def test_boolean_compare_transpose_tzindex_with_dst(self, tz): - # GH 19970 - idx = date_range('20161101', '20161130', freq='4H', tz=tz) - df = DataFrame({'a': range(len(idx)), 'b': range(len(idx))}, - index=idx) - result = df.T == df.T - expected = DataFrame(True, index=list('ab'), columns=idx) - assert_frame_equal(result, expected) - def test_combine_generic(self): df1 = self.frame df2 = self.frame.loc[self.frame.index[:-5], ['A', 'B', 'C']] diff --git a/pandas/tests/frame/test_timezones.py b/pandas/tests/frame/test_timezones.py index fa589a0aa4817..3956968173070 100644 --- a/pandas/tests/frame/test_timezones.py +++ b/pandas/tests/frame/test_timezones.py @@ -133,3 +133,13 @@ def test_frame_reset_index(self, tz): xp = df.index.tz rs = roundtripped.index.tz assert xp == rs + + @pytest.mark.parametrize('tz', [None, 'America/New_York']) + def test_boolean_compare_transpose_tzindex_with_dst(self, tz): + # GH 19970 + idx = date_range('20161101', '20161130', freq='4H', tz=tz) + df = DataFrame({'a': range(len(idx)), 'b': range(len(idx))}, + index=idx) + result = df.T == df.T + expected = DataFrame(True, index=list('ab'), columns=idx) + tm.assert_frame_equal(result, expected)