Skip to content

Commit c0e75a5

Browse files
jbrockmendeljreback
authored andcommitted
fix overflows in Timestamp.tz_localize near boundaries (#19626)
1 parent 507a2a2 commit c0e75a5

File tree

4 files changed

+67
-9
lines changed

4 files changed

+67
-9
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -727,6 +727,7 @@ Timezones
727727
- Bug in tz-aware :class:`DatetimeIndex` where addition/subtraction with a :class:`TimedeltaIndex` or array with ``dtype='timedelta64[ns]'`` was incorrect (:issue:`17558`)
728728
- Bug in :func:`DatetimeIndex.insert` where inserting ``NaT`` into a timezone-aware index incorrectly raised (:issue:`16357`)
729729
- Bug in the :class:`DataFrame` constructor, where tz-aware Datetimeindex and a given column name will result in an empty ``DataFrame`` (:issue:`19157`)
730+
- Bug in :func:`Timestamp.tz_localize` where localizing a timestamp near the minimum or maximum valid values could overflow and return a timestamp with an incorrect nanosecond value (:issue:`12677`)
730731

731732
Offsets
732733
^^^^^^^

pandas/_libs/tslibs/conversion.pxd

-2
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
2121
cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
2222
int32_t nanos=*)
2323

24-
cdef void _localize_tso(_TSObject obj, object tz)
25-
2624
cpdef int64_t tz_convert_single(int64_t val, object tz1, object tz2)
2725

2826
cdef int64_t get_datetime64_nanos(object val) except? -1

pandas/_libs/tslibs/conversion.pyx

+49-7
Original file line numberDiff line numberDiff line change
@@ -309,12 +309,13 @@ cdef convert_to_tsobject(object ts, object tz, object unit,
309309
raise TypeError('Cannot convert input [{}] of type {} to '
310310
'Timestamp'.format(ts, type(ts)))
311311

312-
if obj.value != NPY_NAT:
313-
check_dts_bounds(&obj.dts)
314-
315312
if tz is not None:
316-
_localize_tso(obj, tz)
313+
localize_tso(obj, tz)
317314

315+
if obj.value != NPY_NAT:
316+
# check_overflows needs to run after localize_tso
317+
check_dts_bounds(&obj.dts)
318+
check_overflows(obj)
318319
return obj
319320

320321

@@ -391,6 +392,7 @@ cdef _TSObject convert_datetime_to_tsobject(datetime ts, object tz,
391392
obj.dts.ps = nanos * 1000
392393

393394
check_dts_bounds(&obj.dts)
395+
check_overflows(obj)
394396
return obj
395397

396398

@@ -454,6 +456,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
454456
obj.value = tz_convert_single(obj.value, obj.tzinfo, 'UTC')
455457
if tz is None:
456458
check_dts_bounds(&obj.dts)
459+
check_overflows(obj)
457460
return obj
458461
else:
459462
# Keep the converter same as PyDateTime's
@@ -469,7 +472,7 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
469472
else:
470473
ts = obj.value
471474
if tz is not None:
472-
# shift for _localize_tso
475+
# shift for localize_tso
473476
ts = tz_localize_to_utc(np.array([ts], dtype='i8'), tz,
474477
ambiguous='raise',
475478
errors='raise')[0]
@@ -490,12 +493,51 @@ cdef _TSObject convert_str_to_tsobject(object ts, object tz, object unit,
490493
return convert_to_tsobject(ts, tz, unit, dayfirst, yearfirst)
491494

492495

496+
cdef inline check_overflows(_TSObject obj):
497+
"""
498+
Check that we haven't silently overflowed in timezone conversion
499+
500+
Parameters
501+
----------
502+
obj : _TSObject
503+
504+
Returns
505+
-------
506+
None
507+
508+
Raises
509+
------
510+
OutOfBoundsDatetime
511+
"""
512+
# GH#12677
513+
if obj.dts.year == 1677:
514+
if not (obj.value < 0):
515+
raise OutOfBoundsDatetime
516+
elif obj.dts.year == 2262:
517+
if not (obj.value > 0):
518+
raise OutOfBoundsDatetime
519+
520+
493521
# ----------------------------------------------------------------------
494522
# Localization
495523

496-
cdef inline void _localize_tso(_TSObject obj, object tz):
524+
cdef inline void localize_tso(_TSObject obj, tzinfo tz):
497525
"""
498-
Take a TSObject in UTC and localizes to timezone tz.
526+
Given the UTC nanosecond timestamp in obj.value, find the wall-clock
527+
representation of that timestamp in the given timezone.
528+
529+
Parameters
530+
----------
531+
obj : _TSObject
532+
tz : tzinfo
533+
534+
Returns
535+
-------
536+
None
537+
538+
Notes
539+
-----
540+
Sets obj.tzinfo inplace, alters obj.dts inplace.
499541
"""
500542
cdef:
501543
ndarray[int64_t] trans, deltas

pandas/tests/scalar/timestamp/test_timezones.py

+17
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,29 @@
1515
import pandas.util._test_decorators as td
1616

1717
from pandas import Timestamp, NaT
18+
from pandas.errors import OutOfBoundsDatetime
1819

1920

2021
class TestTimestampTZOperations(object):
2122
# --------------------------------------------------------------
2223
# Timestamp.tz_localize
2324

25+
def test_tz_localize_pushes_out_of_bounds(self):
26+
# GH#12677
27+
# tz_localize that pushes away from the boundary is OK
28+
pac = Timestamp.min.tz_localize('US/Pacific')
29+
assert pac.value > Timestamp.min.value
30+
pac.tz_convert('Asia/Tokyo') # tz_convert doesn't change value
31+
with pytest.raises(OutOfBoundsDatetime):
32+
Timestamp.min.tz_localize('Asia/Tokyo')
33+
34+
# tz_localize that pushes away from the boundary is OK
35+
tokyo = Timestamp.max.tz_localize('Asia/Tokyo')
36+
assert tokyo.value < Timestamp.max.value
37+
tokyo.tz_convert('US/Pacific') # tz_convert doesn't change value
38+
with pytest.raises(OutOfBoundsDatetime):
39+
Timestamp.max.tz_localize('US/Pacific')
40+
2441
def test_tz_localize_ambiguous_bool(self):
2542
# make sure that we are correctly accepting bool values as ambiguous
2643
# GH#14402

0 commit comments

Comments
 (0)