Skip to content

BUG: Correct Timestamp localization with tz near DST (#11481) #15934

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 8, 2017
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.20.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1124,6 +1124,7 @@ Conversion
- Bug in ``Timestamp.replace`` now raises ``TypeError`` when incorrect argument names are given; previously this raised ``ValueError`` (:issue:`15240`)
- Bug in ``Timestamp.replace`` with compat for passing long integers (:issue:`15030`)
- Bug in ``Timestamp`` returning UTC based time/date attributes when a timezone was provided (:issue:`13303`)
- Bug in ``Timestamp`` incorrectly localizing timezones during construction (:issue:`11481`, :issue:`15777`)
- Bug in ``TimedeltaIndex`` addition where overflow was being allowed without error (:issue:`14816`)
- Bug in ``TimedeltaIndex`` raising a ``ValueError`` when boolean indexing with ``loc`` (:issue:`14946`)
- Bug in catching an overflow in ``Timestamp`` + ``Timedelta/Offset`` operations (:issue:`15126`)
Expand Down
40 changes: 38 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1569,7 +1569,9 @@ cpdef convert_str_to_tsobject(object ts, object tz, object unit,
ts = obj.value
if tz is not None:
# shift for _localize_tso
ts = tz_convert_single(ts, tz, 'UTC')
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
ambiguous='raise',
errors='raise')[0]
except ValueError:
try:
ts = parse_datetime_string(
Expand Down Expand Up @@ -4073,7 +4075,23 @@ except:
have_pytz = False


@cython.boundscheck(False)
@cython.wraparound(False)
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
"""
Convert the values (in i8) from timezone1 to timezone2

Parameters
----------
vals : int64 ndarray
tz1 : string / timezone object
tz2 : string / timezone object

Returns
-------
int64 ndarray of converted
"""

cdef:
ndarray[int64_t] utc_dates, tt, result, trans, deltas
Py_ssize_t i, j, pos, n = len(vals)
Expand Down Expand Up @@ -4175,6 +4193,23 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):


def tz_convert_single(int64_t val, object tz1, object tz2):
"""
Convert the val (in i8) from timezone1 to timezone2

This is a single timezone versoin of tz_convert

Parameters
----------
val : int64
tz1 : string / timezone object
tz2 : string / timezone object

Returns
-------
int64 converted

"""

cdef:
ndarray[int64_t] trans, deltas
Py_ssize_t pos
Expand Down Expand Up @@ -4374,7 +4409,7 @@ cpdef ndarray _unbox_utcoffsets(object transinfo):
def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,
object errors='raise'):
"""
Localize tzinfo-naive DateRange to given time zone (using pytz). If
Localize tzinfo-naive i8 to given time zone (using pytz). If
there are ambiguities in the values, raise AmbiguousTimeError.

Returns
Expand Down Expand Up @@ -4546,6 +4581,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, object tz, object ambiguous=None,

return result


cdef inline bisect_right_i8(int64_t *data, int64_t val, Py_ssize_t n):
cdef Py_ssize_t pivot, left = 0, right = n

Expand Down
6 changes: 3 additions & 3 deletions pandas/tests/series/test_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -1024,9 +1024,9 @@ def test_setitem_with_tz_dst(self):
# scalar
s = orig.copy()
s[1] = pd.Timestamp('2011-01-01', tz=tz)
exp = pd.Series([pd.Timestamp('2016-11-06 00:00', tz=tz),
pd.Timestamp('2011-01-01 00:00', tz=tz),
pd.Timestamp('2016-11-06 02:00', tz=tz)])
exp = pd.Series([pd.Timestamp('2016-11-06 00:00-04:00', tz=tz),
pd.Timestamp('2011-01-01 00:00-05:00', tz=tz),
pd.Timestamp('2016-11-06 01:00-05:00', tz=tz)])
tm.assert_series_equal(s, exp)

s = orig.copy()
Expand Down
51 changes: 49 additions & 2 deletions pandas/tests/tseries/test_timezones.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# pylint: disable-msg=E1101,W0612
import pytest
import pytz
import numpy as np
from distutils.version import LooseVersion
Expand Down Expand Up @@ -159,6 +160,52 @@ def test_timestamp_constructed_by_date_and_tz_explicit(self):
self.assertEqual(result.hour, expected.hour)
self.assertEqual(result, expected)

def test_timestamp_constructor_near_dst_boundary(self):
# GH 11481 & 15777
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you give a 1 sentence on what the error was

# Naive string timestamps were being localized incorrectly
# with tz_convert_single instead of tz_localize_to_utc

for tz in ['Europe/Brussels', 'Europe/Prague']:
result = Timestamp('2015-10-25 01:00', tz=tz)
expected = Timestamp('2015-10-25 01:00').tz_localize(tz)
assert result == expected

with pytest.raises(pytz.AmbiguousTimeError):
Timestamp('2015-10-25 02:00', tz=tz)

result = Timestamp('2017-03-26 01:00', tz='Europe/Paris')
expected = Timestamp('2017-03-26 01:00').tz_localize('Europe/Paris')
assert result == expected

with pytest.raises(pytz.NonExistentTimeError):
Timestamp('2017-03-26 02:00', tz='Europe/Paris')

# GH 11708
result = to_datetime("2015-11-18 15:30:00+05:30").tz_localize(
'UTC').tz_convert('Asia/Kolkata')
expected = Timestamp('2015-11-18 15:30:00+0530', tz='Asia/Kolkata')
assert result == expected

# GH 15823
result = Timestamp('2017-03-26 00:00', tz='Europe/Paris')
expected = Timestamp('2017-03-26 00:00:00+0100', tz='Europe/Paris')
assert result == expected

result = Timestamp('2017-03-26 01:00', tz='Europe/Paris')
expected = Timestamp('2017-03-26 01:00:00+0100', tz='Europe/Paris')
assert result == expected

with pytest.raises(pytz.NonExistentTimeError):
Timestamp('2017-03-26 02:00', tz='Europe/Paris')
result = Timestamp('2017-03-26 02:00:00+0100', tz='Europe/Paris')
expected = Timestamp(result.value).tz_localize(
'UTC').tz_convert('Europe/Paris')
assert result == expected

result = Timestamp('2017-03-26 03:00', tz='Europe/Paris')
expected = Timestamp('2017-03-26 03:00:00+0200', tz='Europe/Paris')
assert result == expected

def test_timestamp_to_datetime_tzoffset(self):
# tzoffset
from dateutil.tz import tzoffset
Expand Down Expand Up @@ -517,8 +564,8 @@ def f():
freq="H"))
if dateutil.__version__ != LooseVersion('2.6.0'):
# GH 14621
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00', tz=tz,
freq="H"))
self.assertEqual(times[-1], Timestamp('2013-10-27 01:00:00+0000',
tz=tz, freq="H"))

def test_ambiguous_nat(self):
tz = self.tz('US/Eastern')
Expand Down