diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 083242cd69b74..0097847874616 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -663,6 +663,7 @@ Timezones - Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`) - Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`) - Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`) +- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`) Offsets ^^^^^^^ diff --git a/pandas/_libs/tslibs/conversion.pxd b/pandas/_libs/tslibs/conversion.pxd index 6e7df10e7c424..59c1cde11f925 100644 --- a/pandas/_libs/tslibs/conversion.pxd +++ b/pandas/_libs/tslibs/conversion.pxd @@ -21,8 +21,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit, cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, int32_t nanos=*) -cdef void _localize_tso(_TSObject obj, object tz) - cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2) cdef int64_t get_datetime64_nanos(object val) except? -1 diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 4f1a053da6f1d..f37d71af0bd4d 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -309,12 +309,13 @@ cdef convert_to_tsobject(object ts, object tz, object unit, raise TypeError('Cannot convert input [{}] of type {} to ' 'Timestamp'.format(ts, type(ts))) - if obj.value != NPY_NAT: - check_dts_bounds(&obj.dts) - if tz is not None: - _localize_tso(obj, tz) + localize_tso(obj, tz) + if obj.value != NPY_NAT: + # check_overflows needs to run after localize_tso + check_dts_bounds(&obj.dts) + check_overflows(obj) return obj @@ -391,6 +392,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz, obj.dts.ps = nanos * 1000 check_dts_bounds(&obj.dts) + check_overflows(obj) return obj @@ -454,6 +456,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC') if tz is None: check_dts_bounds(&obj.dts) + check_overflows(obj) return obj else: # Keep the converter same as PyDateTime's @@ -469,7 +472,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, else: ts = obj.value if tz is not None: - # shift for _localize_tso + # shift for localize_tso ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz, ambiguous='raise', errors='raise')[0] @@ -490,12 +493,51 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit, return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst) +cdef inline check_overflows(_TSObject obj): + """ + Check that we haven't silently overflowed in timezone conversion + + Parameters + ---------- + obj : _TSObject + + Returns + ------- + None + + Raises + ------ + OutOfBoundsDatetime + """ + # GH#12677 + if obj.dts.year == 1677: + if not (obj.value < 0): + raise OutOfBoundsDatetime + elif obj.dts.year == 2262: + if not (obj.value > 0): + raise OutOfBoundsDatetime + + # ---------------------------------------------------------------------- # Localization -cdef inline void _localize_tso(_TSObject obj, object tz): +cdef inline void localize_tso(_TSObject obj, tzinfo tz): """ - Take a TSObject in UTC and localizes to timezone tz. + Given the UTC nanosecond timestamp in obj.value, find the wall-clock + representation of that timestamp in the given timezone. + + Parameters + ---------- + obj : _TSObject + tz : tzinfo + + Returns + ------- + None + + Notes + ----- + Sets obj.tzinfo inplace, alters obj.dts inplace. """ cdef: ndarray[int64_t] trans, deltas diff --git a/pandas/tests/scalar/timestamp/test_timezones.py b/pandas/tests/scalar/timestamp/test_timezones.py index 7a5c6feb8b651..f43651dc6f0db 100644 --- a/pandas/tests/scalar/timestamp/test_timezones.py +++ b/pandas/tests/scalar/timestamp/test_timezones.py @@ -15,12 +15,29 @@ import pandas.util._test_decorators as td from pandas import Timestamp, NaT +from pandas.errors import OutOfBoundsDatetime class TestTimestampTZOperations(object): # -------------------------------------------------------------- # Timestamp.tz_localize + def test_tz_localize_pushes_out_of_bounds(self): + # GH#12677 + # tz_localize that pushes away from the boundary is OK + pac = Timestamp.min.tz_localize('US/Pacific') + assert pac.value > Timestamp.min.value + pac.tz_convert('Asia/Tokyo') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.min.tz_localize('Asia/Tokyo') + + # tz_localize that pushes away from the boundary is OK + tokyo = Timestamp.max.tz_localize('Asia/Tokyo') + assert tokyo.value < Timestamp.max.value + tokyo.tz_convert('US/Pacific') # tz_convert doesn't change value + with pytest.raises(OutOfBoundsDatetime): + Timestamp.max.tz_localize('US/Pacific') + def test_tz_localize_ambiguous_bool(self): # make sure that we are correctly accepting bool values as ambiguous # GH#14402