diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 27540a9626398..4965dc01b9e88 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -338,7 +338,7 @@ Bug Fixes - Bug in ``.resample(..)`` with a ``PeriodIndex`` not changing its ``freq`` appropriately when empty (:issue:`13067`) - Bug in ``.resample(..)`` with a ``PeriodIndex`` not retaining its type or name with an empty ``DataFrame``appropriately when empty (:issue:`13212`) - Bug in ``groupby(..).resample(..)`` where passing some keywords would raise an exception (:issue:`13235`) - +- Bug in ``.tz-convert`` tz-aware ``DateTimeIndex`` relies on index being sorted for correct results (:issue: `13306`) diff --git a/pandas/tseries/tests/test_timezones.py b/pandas/tseries/tests/test_timezones.py index b80ee4c5c1e39..afe9d0652db19 100644 --- a/pandas/tseries/tests/test_timezones.py +++ b/pandas/tseries/tests/test_timezones.py @@ -902,6 +902,88 @@ def test_utc_with_system_utc(self): # check that the time hasn't changed. self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc())) + def test_tz_convert_hour_overflow_dst(self): + # Regression test for: + # https://github.com/pydata/pandas/issues/13306 + + # sorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2009-05-12 09:50:32'] + tt = to_datetime(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = np.array([13, 14, 13], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2009-05-12 13:50:32'] + tt = to_datetime(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = np.array([9, 9, 9], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = ['2008-05-12 09:50:00', + '2008-12-12 09:50:35', + '2008-05-12 09:50:32'] + tt = to_datetime(ts).tz_localize('US/Eastern') + ut = tt.tz_convert('UTC') + expected = np.array([13, 14, 13], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = ['2008-05-12 13:50:00', + '2008-12-12 14:50:35', + '2008-05-12 13:50:32'] + tt = to_datetime(ts).tz_localize('UTC') + ut = tt.tz_convert('US/Eastern') + expected = np.array([9, 9, 9], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + def test_tz_convert_hour_overflow_dst_timestamps(self): + # Regression test for: + # https://github.com/pydata/pandas/issues/13306 + + tz = self.tzstr('US/Eastern') + + # sorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2009-05-12 09:50:32', tz=tz)] + tt = to_datetime(ts) + ut = tt.tz_convert('UTC') + expected = np.array([13, 14, 13], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # sorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2009-05-12 13:50:32', tz='UTC')] + tt = to_datetime(ts) + ut = tt.tz_convert('US/Eastern') + expected = np.array([9, 9, 9], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # unsorted case US/Eastern -> UTC + ts = [Timestamp('2008-05-12 09:50:00', tz=tz), + Timestamp('2008-12-12 09:50:35', tz=tz), + Timestamp('2008-05-12 09:50:32', tz=tz)] + tt = to_datetime(ts) + ut = tt.tz_convert('UTC') + expected = np.array([13, 14, 13], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + + # unsorted case UTC -> US/Eastern + ts = [Timestamp('2008-05-12 13:50:00', tz='UTC'), + Timestamp('2008-12-12 14:50:35', tz='UTC'), + Timestamp('2008-05-12 13:50:32', tz='UTC')] + tt = to_datetime(ts) + ut = tt.tz_convert('US/Eastern') + expected = np.array([9, 9, 9], dtype=np.int32) + self.assert_numpy_array_equal(ut.hour, expected) + def test_tslib_tz_convert_trans_pos_plus_1__bug(self): # Regression test for tslib.tz_convert(vals, tz1, tz2). # See https://github.com/pydata/pandas/issues/4496 for details. diff --git a/pandas/tslib.pyx b/pandas/tslib.pyx index b3fb4989b2f23..6453e65ecdc81 100644 --- a/pandas/tslib.pyx +++ b/pandas/tslib.pyx @@ -3754,8 +3754,8 @@ except: def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): cdef: - ndarray[int64_t] utc_dates, tt, result, trans, deltas - Py_ssize_t i, pos, n = len(vals) + ndarray[int64_t] utc_dates, tt, result, trans, deltas, posn + Py_ssize_t i, j, pos, n = len(vals) int64_t v, offset pandas_datetimestruct dts Py_ssize_t trans_len @@ -3791,19 +3791,18 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): return vals trans_len = len(trans) - pos = trans.searchsorted(tt[0]) - 1 - if pos < 0: - raise ValueError('First time before start of DST info') - - offset = deltas[pos] + posn = trans.searchsorted(tt, side='right') + j = 0 for i in range(n): v = vals[i] if v == NPY_NAT: utc_dates[i] = NPY_NAT else: - while pos + 1 < trans_len and v >= trans[pos + 1]: - pos += 1 - offset = deltas[pos] + pos = posn[j] - 1 + j = j + 1 + if pos < 0: + raise ValueError('First time before start of DST info') + offset = deltas[pos] utc_dates[i] = v - offset else: utc_dates = vals @@ -3838,20 +3837,18 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2): if (result==NPY_NAT).all(): return result - pos = trans.searchsorted(utc_dates[utc_dates!=NPY_NAT][0]) - 1 - if pos < 0: - raise ValueError('First time before start of DST info') - - # TODO: this assumed sortedness :/ - offset = deltas[pos] + posn = trans.searchsorted(utc_dates[utc_dates!=NPY_NAT], side='right') + j = 0 for i in range(n): v = utc_dates[i] if vals[i] == NPY_NAT: result[i] = vals[i] else: - while pos + 1 < trans_len and v >= trans[pos + 1]: - pos += 1 - offset = deltas[pos] + pos = posn[j] - 1 + j = j + 1 + if pos < 0: + raise ValueError('First time before start of DST info') + offset = deltas[pos] result[i] = v + offset return result