Skip to content

Commit ce5a4ef

Browse files
committed
Merge pull request #7798 from sinhrks/tz_convert_bug
BUG: tslib.tz_convert and tslib.tz_convert_single may output different result in DST
2 parents a59a7ea + 655f7b1 commit ce5a4ef

File tree

8 files changed

+128
-65
lines changed

8 files changed

+128
-65
lines changed

doc/source/v0.15.0.txt

+2-2
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,8 @@ Enhancements
233233

234234

235235

236-
237-
236+
- Bug in ``tslib.tz_convert`` and ``tslib.tz_convert_single`` may return different results (:issue:`7798`)
237+
- Bug in ``DatetimeIndex.intersection`` of non-overlapping timestamps with tz raises ``IndexError`` (:issue:`7880`)
238238

239239

240240

pandas/tests/test_frame.py

-17
Original file line numberDiff line numberDiff line change
@@ -12636,23 +12636,6 @@ def test_consolidate_datetime64(self):
1263612636
assert_array_equal(df.starting.values, ser_starting.index.values)
1263712637
assert_array_equal(df.ending.values, ser_ending.index.values)
1263812638

12639-
def test_tslib_tz_convert_trans_pos_plus_1__bug(self):
12640-
# Regression test for tslib.tz_convert(vals, tz1, tz2).
12641-
# See https://github.com/pydata/pandas/issues/4496 for details.
12642-
idx = pd.date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq='1min')
12643-
idx = idx.tz_localize('UTC')
12644-
idx = idx.tz_convert('Europe/Moscow')
12645-
12646-
test_vector = pd.Series([3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
12647-
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
12648-
3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
12649-
4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
12650-
4, 4, 4, 4, 4, 4, 4, 4, 5], dtype=int)
12651-
12652-
hours = idx.hour
12653-
12654-
np.testing.assert_equal(hours, test_vector.values)
12655-
1265612639
def _check_bool_op(self, name, alternative, frame=None, has_skipna=True,
1265712640
has_bool_only=False):
1265812641
if frame is None:

pandas/tseries/frequencies.py

+1-20
Original file line numberDiff line numberDiff line change
@@ -683,25 +683,6 @@ def infer_freq(index, warn=True):
683683
_ONE_HOUR = 60 * _ONE_MINUTE
684684
_ONE_DAY = 24 * _ONE_HOUR
685685

686-
def _tz_convert_with_transitions(values, to_tz, from_tz):
687-
"""
688-
convert i8 values from the specificed timezone to the to_tz zone, taking
689-
into account DST transitions
690-
"""
691-
692-
# vectorization is slow, so tests if we can do this via the faster tz_convert
693-
f = lambda x: tslib.tz_convert_single(x, to_tz, from_tz)
694-
695-
if len(values) > 2:
696-
first_slow, last_slow = f(values[0]),f(values[-1])
697-
698-
first_fast, last_fast = tslib.tz_convert(np.array([values[0],values[-1]],dtype='i8'),to_tz,from_tz)
699-
700-
# don't cross a DST, so ok
701-
if first_fast == first_slow and last_fast == last_slow:
702-
return tslib.tz_convert(values,to_tz,from_tz)
703-
704-
return np.vectorize(f)(values)
705686

706687
class _FrequencyInferer(object):
707688
"""
@@ -713,7 +694,7 @@ def __init__(self, index, warn=True):
713694
self.values = np.asarray(index).view('i8')
714695

715696
if index.tz is not None:
716-
self.values = _tz_convert_with_transitions(self.values,'UTC',index.tz)
697+
self.values = tslib.tz_convert(self.values, 'UTC', index.tz)
717698

718699
self.warn = warn
719700

pandas/tseries/index.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from pandas.compat import u
1515
from pandas.tseries.frequencies import (
1616
infer_freq, to_offset, get_period_alias,
17-
Resolution, _tz_convert_with_transitions)
17+
Resolution)
1818
from pandas.core.base import DatetimeIndexOpsMixin
1919
from pandas.tseries.offsets import DateOffset, generate_range, Tick, CDay
2020
from pandas.tseries.tools import parse_time_string, normalize_date
@@ -1569,7 +1569,7 @@ def insert(self, loc, item):
15691569
new_dates = np.concatenate((self[:loc].asi8, [item.view(np.int64)],
15701570
self[loc:].asi8))
15711571
if self.tz is not None:
1572-
new_dates = _tz_convert_with_transitions(new_dates,'UTC',self.tz)
1572+
new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
15731573
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
15741574

15751575
except (AttributeError, TypeError):
@@ -1606,7 +1606,7 @@ def delete(self, loc):
16061606
freq = self.freq
16071607

16081608
if self.tz is not None:
1609-
new_dates = _tz_convert_with_transitions(new_dates, 'UTC', self.tz)
1609+
new_dates = tslib.tz_convert(new_dates, 'UTC', self.tz)
16101610
return DatetimeIndex(new_dates, name=self.name, freq=freq, tz=self.tz)
16111611

16121612
def _view_like(self, ndarray):

pandas/tseries/tests/test_timeseries.py

+14-10
Original file line numberDiff line numberDiff line change
@@ -3203,8 +3203,8 @@ def test_union(self):
32033203

32043204
def test_intersection(self):
32053205
# GH 4690 (with tz)
3206-
for tz in [None, 'Asia/Tokyo']:
3207-
rng = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
3206+
for tz in [None, 'Asia/Tokyo', 'US/Eastern', 'dateutil/US/Pacific']:
3207+
base = date_range('6/1/2000', '6/30/2000', freq='D', name='idx')
32083208

32093209
# if target has the same name, it is preserved
32103210
rng2 = date_range('5/15/2000', '6/20/2000', freq='D', name='idx')
@@ -3214,16 +3214,18 @@ def test_intersection(self):
32143214
rng3 = date_range('5/15/2000', '6/20/2000', freq='D', name='other')
32153215
expected3 = date_range('6/1/2000', '6/20/2000', freq='D', name=None)
32163216

3217-
result2 = rng.intersection(rng2)
3218-
result3 = rng.intersection(rng3)
3219-
for (result, expected) in [(result2, expected2), (result3, expected3)]:
3217+
rng4 = date_range('7/1/2000', '7/31/2000', freq='D', name='idx')
3218+
expected4 = DatetimeIndex([], name='idx')
3219+
3220+
for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]:
3221+
result = base.intersection(rng)
32203222
self.assertTrue(result.equals(expected))
32213223
self.assertEqual(result.name, expected.name)
32223224
self.assertEqual(result.freq, expected.freq)
32233225
self.assertEqual(result.tz, expected.tz)
32243226

32253227
# non-monotonic
3226-
rng = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-02', '2011-01-03'],
3228+
base = DatetimeIndex(['2011-01-05', '2011-01-04', '2011-01-02', '2011-01-03'],
32273229
tz=tz, name='idx')
32283230

32293231
rng2 = DatetimeIndex(['2011-01-04', '2011-01-02', '2011-02-02', '2011-02-03'],
@@ -3234,10 +3236,12 @@ def test_intersection(self):
32343236
tz=tz, name='other')
32353237
expected3 = DatetimeIndex(['2011-01-04', '2011-01-02'], tz=tz, name=None)
32363238

3237-
result2 = rng.intersection(rng2)
3238-
result3 = rng.intersection(rng3)
3239-
for (result, expected) in [(result2, expected2), (result3, expected3)]:
3240-
print(result, expected)
3239+
# GH 7880
3240+
rng4 = date_range('7/1/2000', '7/31/2000', freq='D', tz=tz, name='idx')
3241+
expected4 = DatetimeIndex([], tz=tz, name='idx')
3242+
3243+
for (rng, expected) in [(rng2, expected2), (rng3, expected3), (rng4, expected4)]:
3244+
result = base.intersection(rng)
32413245
self.assertTrue(result.equals(expected))
32423246
self.assertEqual(result.name, expected.name)
32433247
self.assertIsNone(result.freq)

pandas/tseries/tests/test_timezones.py

+58
Original file line numberDiff line numberDiff line change
@@ -787,6 +787,64 @@ def test_utc_with_system_utc(self):
787787
# check that the time hasn't changed.
788788
self.assertEqual(ts, ts.tz_convert(dateutil.tz.tzutc()))
789789

790+
def test_tslib_tz_convert_trans_pos_plus_1__bug(self):
791+
# Regression test for tslib.tz_convert(vals, tz1, tz2).
792+
# See https://github.com/pydata/pandas/issues/4496 for details.
793+
for freq, n in [('H', 1), ('T', 60), ('S', 3600)]:
794+
idx = date_range(datetime(2011, 3, 26, 23), datetime(2011, 3, 27, 1), freq=freq)
795+
idx = idx.tz_localize('UTC')
796+
idx = idx.tz_convert('Europe/Moscow')
797+
798+
expected = np.repeat(np.array([3, 4, 5]), np.array([n, n, 1]))
799+
self.assert_numpy_array_equal(idx.hour, expected)
800+
801+
def test_tslib_tz_convert_dst(self):
802+
for freq, n in [('H', 1), ('T', 60), ('S', 3600)]:
803+
# Start DST
804+
idx = date_range('2014-03-08 23:00', '2014-03-09 09:00', freq=freq, tz='UTC')
805+
idx = idx.tz_convert('US/Eastern')
806+
expected = np.repeat(np.array([18, 19, 20, 21, 22, 23, 0, 1, 3, 4, 5]),
807+
np.array([n, n, n, n, n, n, n, n, n, n, 1]))
808+
self.assert_numpy_array_equal(idx.hour, expected)
809+
810+
idx = date_range('2014-03-08 18:00', '2014-03-09 05:00', freq=freq, tz='US/Eastern')
811+
idx = idx.tz_convert('UTC')
812+
expected = np.repeat(np.array([23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
813+
np.array([n, n, n, n, n, n, n, n, n, n, 1]))
814+
self.assert_numpy_array_equal(idx.hour, expected)
815+
816+
# End DST
817+
idx = date_range('2014-11-01 23:00', '2014-11-02 09:00', freq=freq, tz='UTC')
818+
idx = idx.tz_convert('US/Eastern')
819+
expected = np.repeat(np.array([19, 20, 21, 22, 23, 0, 1, 1, 2, 3, 4]),
820+
np.array([n, n, n, n, n, n, n, n, n, n, 1]))
821+
self.assert_numpy_array_equal(idx.hour, expected)
822+
823+
idx = date_range('2014-11-01 18:00', '2014-11-02 05:00', freq=freq, tz='US/Eastern')
824+
idx = idx.tz_convert('UTC')
825+
expected = np.repeat(np.array([22, 23, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
826+
np.array([n, n, n, n, n, n, n, n, n, n, n, n, 1]))
827+
self.assert_numpy_array_equal(idx.hour, expected)
828+
829+
# daily
830+
# Start DST
831+
idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='UTC')
832+
idx = idx.tz_convert('US/Eastern')
833+
self.assert_numpy_array_equal(idx.hour, np.array([19, 19]))
834+
835+
idx = date_range('2014-03-08 00:00', '2014-03-09 00:00', freq='D', tz='US/Eastern')
836+
idx = idx.tz_convert('UTC')
837+
self.assert_numpy_array_equal(idx.hour, np.array([5, 5]))
838+
839+
# End DST
840+
idx = date_range('2014-11-01 00:00', '2014-11-02 00:00', freq='D', tz='UTC')
841+
idx = idx.tz_convert('US/Eastern')
842+
self.assert_numpy_array_equal(idx.hour, np.array([20, 20]))
843+
844+
idx = date_range('2014-11-01 00:00', '2014-11-02 000:00', freq='D', tz='US/Eastern')
845+
idx = idx.tz_convert('UTC')
846+
self.assert_numpy_array_equal(idx.hour, np.array([4, 4]))
847+
790848

791849
class TestTimeZoneCacheKey(tm.TestCase):
792850
def test_cache_keys_are_distinct_for_pytz_vs_dateutil(self):

pandas/tseries/tests/test_tslib.py

+38
Original file line numberDiff line numberDiff line change
@@ -425,6 +425,44 @@ def test_period_ordinal_business_day(self):
425425
# Tuesday
426426
self.assertEqual(11418, period_ordinal(2013, 10, 8, 0, 0, 0, 0, 0, get_freq('B')))
427427

428+
def test_tslib_tz_convert(self):
429+
def compare_utc_to_local(tz_didx, utc_didx):
430+
f = lambda x: tslib.tz_convert_single(x, 'UTC', tz_didx.tz)
431+
result = tslib.tz_convert(tz_didx.asi8, 'UTC', tz_didx.tz)
432+
result_single = np.vectorize(f)(tz_didx.asi8)
433+
self.assert_numpy_array_equal(result, result_single)
434+
435+
def compare_local_to_utc(tz_didx, utc_didx):
436+
f = lambda x: tslib.tz_convert_single(x, tz_didx.tz, 'UTC')
437+
result = tslib.tz_convert(utc_didx.asi8, tz_didx.tz, 'UTC')
438+
result_single = np.vectorize(f)(utc_didx.asi8)
439+
self.assert_numpy_array_equal(result, result_single)
440+
441+
for tz in ['UTC', 'Asia/Tokyo', 'US/Eastern', 'Europe/Moscow']:
442+
# US: 2014-03-09 - 2014-11-11
443+
# MOSCOW: 2014-10-26 / 2014-12-31
444+
tz_didx = date_range('2014-03-01', '2015-01-10', freq='H', tz=tz)
445+
utc_didx = date_range('2014-03-01', '2015-01-10', freq='H')
446+
compare_utc_to_local(tz_didx, utc_didx)
447+
# local tz to UTC can be differ in hourly (or higher) freqs because of DST
448+
compare_local_to_utc(tz_didx, utc_didx)
449+
450+
tz_didx = date_range('2000-01-01', '2020-01-01', freq='D', tz=tz)
451+
utc_didx = date_range('2000-01-01', '2020-01-01', freq='D')
452+
compare_utc_to_local(tz_didx, utc_didx)
453+
compare_local_to_utc(tz_didx, utc_didx)
454+
455+
tz_didx = date_range('2000-01-01', '2100-01-01', freq='A', tz=tz)
456+
utc_didx = date_range('2000-01-01', '2100-01-01', freq='A')
457+
compare_utc_to_local(tz_didx, utc_didx)
458+
compare_local_to_utc(tz_didx, utc_didx)
459+
460+
# Check empty array
461+
result = tslib.tz_convert(np.array([], dtype=np.int64),
462+
tslib.maybe_get_tz('US/Eastern'),
463+
tslib.maybe_get_tz('Asia/Tokyo'))
464+
self.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
465+
428466
class TestTimestampOps(tm.TestCase):
429467
def test_timestamp_and_datetime(self):
430468
self.assertEqual((Timestamp(datetime.datetime(2013, 10, 13)) - datetime.datetime(2013, 10, 12)).days, 1)

pandas/tslib.pyx

+12-13
Original file line numberDiff line numberDiff line change
@@ -1907,10 +1907,14 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
19071907
Py_ssize_t i, pos, n = len(vals)
19081908
int64_t v, offset
19091909
pandas_datetimestruct dts
1910+
Py_ssize_t trans_len
19101911

19111912
if not have_pytz:
19121913
import pytz
19131914

1915+
if len(vals) == 0:
1916+
return np.array([], dtype=np.int64)
1917+
19141918
# Convert to UTC
19151919

19161920
if _get_zone(tz1) != 'UTC':
@@ -1927,14 +1931,15 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
19271931
else:
19281932
deltas = _get_deltas(tz1)
19291933
trans = _get_transitions(tz1)
1934+
trans_len = len(trans)
19301935
pos = trans.searchsorted(vals[0]) - 1
19311936
if pos < 0:
19321937
raise ValueError('First time before start of DST info')
19331938

19341939
offset = deltas[pos]
19351940
for i in range(n):
19361941
v = vals[i]
1937-
if v >= [pos + 1]:
1942+
while pos + 1 < trans_len and v >= trans[pos + 1]:
19381943
pos += 1
19391944
offset = deltas[pos]
19401945
utc_dates[i] = v - offset
@@ -1957,29 +1962,23 @@ def tz_convert(ndarray[int64_t] vals, object tz1, object tz2):
19571962

19581963
# Convert UTC to other timezone
19591964
trans = _get_transitions(tz2)
1965+
trans_len = len(trans)
19601966
deltas = _get_deltas(tz2)
1961-
pos = trans.searchsorted(utc_dates[0])
1962-
if pos == 0:
1967+
pos = trans.searchsorted(utc_dates[0]) - 1
1968+
if pos < 0:
19631969
raise ValueError('First time before start of DST info')
1964-
elif pos == len(trans):
1965-
return utc_dates + deltas[-1]
19661970

19671971
# TODO: this assumed sortedness :/
1968-
pos -= 1
1969-
19701972
offset = deltas[pos]
1971-
cdef Py_ssize_t trans_len = len(trans)
1972-
19731973
for i in range(n):
19741974
v = utc_dates[i]
19751975
if vals[i] == NPY_NAT:
19761976
result[i] = vals[i]
19771977
else:
1978-
if (pos + 1) < trans_len and v >= trans[pos + 1]:
1978+
while pos + 1 < trans_len and v >= trans[pos + 1]:
19791979
pos += 1
19801980
offset = deltas[pos]
19811981
result[i] = v + offset
1982-
19831982
return result
19841983

19851984
def tz_convert_single(int64_t val, object tz1, object tz2):
@@ -2005,7 +2004,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
20052004
elif _get_zone(tz1) != 'UTC':
20062005
deltas = _get_deltas(tz1)
20072006
trans = _get_transitions(tz1)
2008-
pos = trans.searchsorted(val) - 1
2007+
pos = trans.searchsorted(val, side='right') - 1
20092008
if pos < 0:
20102009
raise ValueError('First time before start of DST info')
20112010
offset = deltas[pos]
@@ -2024,7 +2023,7 @@ def tz_convert_single(int64_t val, object tz1, object tz2):
20242023
# Convert UTC to other timezone
20252024
trans = _get_transitions(tz2)
20262025
deltas = _get_deltas(tz2)
2027-
pos = trans.searchsorted(utc_date) - 1
2026+
pos = trans.searchsorted(utc_date, side='right') - 1
20282027
if pos < 0:
20292028
raise ValueError('First time before start of DST info')
20302029

0 commit comments

Comments
 (0)