Skip to content

Commit ce56542

Browse files
uwedeportivojreback
authored andcommitted
Fix #13306: Hour overflow in tz-aware datetime conversions.
closes #13306 Author: Uwe Hoffmann <[email protected]> Closes #13313 from uwedeportivo/master and squashes the following commits: be3ed90 [Uwe Hoffmann] whatsnew entry for issue #13306 1f5f7a5 [Uwe Hoffmann] Code Review jreback 82f263a [Uwe Hoffmann] Use vectorized searchsorted and tests. a1ed5a5 [Uwe Hoffmann] Fix #13306: Hour overflow in tz-aware datetime conversions.
1 parent 99e78da commit ce56542

File tree

3 files changed

+99
-20
lines changed

3 files changed

+99
-20
lines changed

doc/source/whatsnew/v0.18.2.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,7 @@ Bug Fixes
338338
- Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`)
339339
- Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame``appropriately when empty (:issue:`13212`)
340340
- Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`)
341-
341+
- Bug in ``.tz_convert`` on a tz-aware ``DateTimeIndex`` that relied on index being sorted for correct results (:issue: `13306`)
342342

343343

344344

pandas/tseries/tests/test_timezones.py

+82
Original file line numberDiff line numberDiff line change
@@ -902,6 +902,88 @@ def test_utc_with_system_utc(self):
902902
# check that the time hasn't changed.
903903
self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc()))
904904

905+
def test_tz_convert_hour_overflow_dst(self):
906+
# Regression test for:
907+
# https://github.com/pydata/pandas/issues/13306
908+
909+
# sorted case US/Eastern -> UTC
910+
ts = ['2008-05-12 09:50:00',
911+
'2008-12-12 09:50:35',
912+
'2009-05-12 09:50:32']
913+
tt = to_datetime(ts).tz_localize('US/Eastern')
914+
ut = tt.tz_convert('UTC')
915+
expected = np.array([13, 14, 13], dtype=np.int32)
916+
self.assert_numpy_array_equal(ut.hour, expected)
917+
918+
# sorted case UTC -> US/Eastern
919+
ts = ['2008-05-12 13:50:00',
920+
'2008-12-12 14:50:35',
921+
'2009-05-12 13:50:32']
922+
tt = to_datetime(ts).tz_localize('UTC')
923+
ut = tt.tz_convert('US/Eastern')
924+
expected = np.array([9, 9, 9], dtype=np.int32)
925+
self.assert_numpy_array_equal(ut.hour, expected)
926+
927+
# unsorted case US/Eastern -> UTC
928+
ts = ['2008-05-12 09:50:00',
929+
'2008-12-12 09:50:35',
930+
'2008-05-12 09:50:32']
931+
tt = to_datetime(ts).tz_localize('US/Eastern')
932+
ut = tt.tz_convert('UTC')
933+
expected = np.array([13, 14, 13], dtype=np.int32)
934+
self.assert_numpy_array_equal(ut.hour, expected)
935+
936+
# unsorted case UTC -> US/Eastern
937+
ts = ['2008-05-12 13:50:00',
938+
'2008-12-12 14:50:35',
939+
'2008-05-12 13:50:32']
940+
tt = to_datetime(ts).tz_localize('UTC')
941+
ut = tt.tz_convert('US/Eastern')
942+
expected = np.array([9, 9, 9], dtype=np.int32)
943+
self.assert_numpy_array_equal(ut.hour, expected)
944+
945+
def test_tz_convert_hour_overflow_dst_timestamps(self):
946+
# Regression test for:
947+
# https://github.com/pydata/pandas/issues/13306
948+
949+
tz = self.tzstr('US/Eastern')
950+
951+
# sorted case US/Eastern -> UTC
952+
ts = [Timestamp('2008-05-12 09:50:00', tz=tz),
953+
Timestamp('2008-12-12 09:50:35', tz=tz),
954+
Timestamp('2009-05-12 09:50:32', tz=tz)]
955+
tt = to_datetime(ts)
956+
ut = tt.tz_convert('UTC')
957+
expected = np.array([13, 14, 13], dtype=np.int32)
958+
self.assert_numpy_array_equal(ut.hour, expected)
959+
960+
# sorted case UTC -> US/Eastern
961+
ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'),
962+
Timestamp('2008-12-12 14:50:35', tz='UTC'),
963+
Timestamp('2009-05-12 13:50:32', tz='UTC')]
964+
tt = to_datetime(ts)
965+
ut = tt.tz_convert('US/Eastern')
966+
expected = np.array([9, 9, 9], dtype=np.int32)
967+
self.assert_numpy_array_equal(ut.hour, expected)
968+
969+
# unsorted case US/Eastern -> UTC
970+
ts = [Timestamp('2008-05-12 09:50:00', tz=tz),
971+
Timestamp('2008-12-12 09:50:35', tz=tz),
972+
Timestamp('2008-05-12 09:50:32', tz=tz)]
973+
tt = to_datetime(ts)
974+
ut = tt.tz_convert('UTC')
975+
expected = np.array([13, 14, 13], dtype=np.int32)
976+
self.assert_numpy_array_equal(ut.hour, expected)
977+
978+
# unsorted case UTC -> US/Eastern
979+
ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'),
980+
Timestamp('2008-12-12 14:50:35', tz='UTC'),
981+
Timestamp('2008-05-12 13:50:32', tz='UTC')]
982+
tt = to_datetime(ts)
983+
ut = tt.tz_convert('US/Eastern')
984+
expected = np.array([9, 9, 9], dtype=np.int32)
985+
self.assert_numpy_array_equal(ut.hour, expected)
986+
905987
def test_tslib_tz_convert_trans_pos_plus_1__bug(self):
906988
# Regression test for tslib.tz_convert(vals, tz1, tz2).
907989
# See https://github.com/pydata/pandas/issues/4496 for details.

pandas/tslib.pyx

+16-19
Original file line numberDiff line numberDiff line change
@@ -3754,8 +3754,8 @@ except:
37543754

37553755
def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
37563756
cdef:
3757-
ndarray[int64_t] utc_dates, tt, result, trans, deltas
3758-
Py_ssize_t i, pos, n = len(vals)
3757+
ndarray[int64_t] utc_dates, tt, result, trans, deltas, posn
3758+
Py_ssize_t i, j, pos, n = len(vals)
37593759
int64_t v, offset
37603760
pandas_datetimestruct dts
37613761
Py_ssize_t trans_len
@@ -3791,19 +3791,18 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
37913791
return vals
37923792

37933793
trans_len = len(trans)
3794-
pos = trans.searchsorted(tt[0]) - 1
3795-
if pos < 0:
3796-
raise ValueError('First time before start of DST info')
3797-
3798-
offset = deltas[pos]
3794+
posn = trans.searchsorted(tt, side='right')
3795+
j = 0
37993796
for i in range(n):
38003797
v = vals[i]
38013798
if v == NPY_NAT:
38023799
utc_dates[i] = NPY_NAT
38033800
else:
3804-
while pos + 1 < trans_len and v >= trans[pos + 1]:
3805-
pos += 1
3806-
offset = deltas[pos]
3801+
pos = posn[j] - 1
3802+
j = j + 1
3803+
if pos < 0:
3804+
raise ValueError('First time before start of DST info')
3805+
offset = deltas[pos]
38073806
utc_dates[i] = v - offset
38083807
else:
38093808
utc_dates = vals
@@ -3838,20 +3837,18 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
38383837
if (result==NPY_NAT).all():
38393838
return result
38403839

3841-
pos = trans.searchsorted(utc_dates[utc_dates!=NPY_NAT][0]) - 1
3842-
if pos < 0:
3843-
raise ValueError('First time before start of DST info')
3844-
3845-
# TODO: this assumed sortedness :/
3846-
offset = deltas[pos]
3840+
posn = trans.searchsorted(utc_dates[utc_dates!=NPY_NAT], side='right')
3841+
j = 0
38473842
for i in range(n):
38483843
v = utc_dates[i]
38493844
if vals[i] == NPY_NAT:
38503845
result[i] = vals[i]
38513846
else:
3852-
while pos + 1 < trans_len and v >= trans[pos + 1]:
3853-
pos += 1
3854-
offset = deltas[pos]
3847+
pos = posn[j] - 1
3848+
j = j + 1
3849+
if pos < 0:
3850+
raise ValueError('First time before start of DST info')
3851+
offset = deltas[pos]
38553852
result[i] = v + offset
38563853
return result
38573854

0 commit comments

Comments
 (0)