Skip to content

Commit b80b5c7

Browse files
sinhrksjreback
authored andcommitted
BUG/API: Index creation with different tz coerces DatetimeIndex, #11488
1 parent fbb09f4 commit b80b5c7

File tree

5 files changed

+227
-7
lines changed

5 files changed

+227
-7
lines changed

doc/source/whatsnew/v0.18.0.txt

+2-3
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,8 @@ Bug Fixes
185185

186186

187187
- Bug in timezone info lost when broadcasting scalar datetime to ``DataFrame`` (:issue:`11682`)
188-
189-
188+
- Bug in ``Index`` creation from ``Timestamp`` with mixed tz coerces to UTC (:issue:`11488`)
189+
- Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`)
190190

191191
- Bug in parsing timezone offset strings with non-zero minutes (:issue:`11708`)
192192

@@ -197,4 +197,3 @@ Bug Fixes
197197
- Bug in ``pd.rolling_median`` where memory allocation failed even with sufficient memory (:issue:`11696`)
198198

199199
- Bug in ``df.replace`` while replacing value in mixed dtype ``Dataframe`` (:issue:`11698`)
200-
- Bug in ``to_numeric`` where it does not raise if input is more than one dimension (:issue:`11776`)

pandas/core/index.py

+7-2
Original file line numberDiff line numberDiff line change
@@ -179,8 +179,13 @@ def __new__(cls, data=None, dtype=None, copy=False, name=None, fastpath=False,
179179
elif inferred != 'string':
180180
if (inferred.startswith('datetime') or
181181
tslib.is_timestamp_array(subarr)):
182-
from pandas.tseries.index import DatetimeIndex
183-
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
182+
183+
if (lib.is_datetime_with_singletz_array(subarr) or
184+
'tz' in kwargs):
185+
# only when subarr has the same tz
186+
from pandas.tseries.index import DatetimeIndex
187+
return DatetimeIndex(subarr, copy=copy, name=name, **kwargs)
188+
184189
elif (inferred.startswith('timedelta') or
185190
lib.is_timedelta_array(subarr)):
186191
from pandas.tseries.tdi import TimedeltaIndex

pandas/src/inference.pyx

+30-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
import sys
22
cimport util
3-
from tslib import NaT
3+
from tslib import NaT, get_timezone
44
from datetime import datetime, timedelta
55
iNaT = util.get_nat()
66

@@ -431,6 +431,35 @@ def is_datetime64_array(ndarray values):
431431
return False
432432
return null_count != n
433433

434+
435+
cpdef is_datetime_with_singletz_array(ndarray[object] values):
436+
"""
437+
Check values have the same tzinfo attribute.
438+
Doesn't check values are datetime-like types.
439+
"""
440+
441+
cdef Py_ssize_t i, j, n = len(values)
442+
cdef object base_val, base_tz, val, tz
443+
444+
if n == 0:
445+
return False
446+
447+
for i in range(n):
448+
base_val = values[i]
449+
if base_val is not NaT:
450+
base_tz = get_timezone(getattr(base_val, 'tzinfo', None))
451+
452+
for j in range(i, n):
453+
val = values[j]
454+
if val is not NaT:
455+
tz = getattr(val, 'tzinfo', None)
456+
if base_tz != tz and base_tz != get_timezone(tz):
457+
return False
458+
break
459+
460+
return True
461+
462+
434463
def is_timedelta_array(ndarray values):
435464
cdef Py_ssize_t i, null_count = 0, n = len(values)
436465
cdef object v

pandas/tests/test_index.py

+183
Original file line numberDiff line numberDiff line change
@@ -3493,6 +3493,189 @@ def test_construction_with_alt(self):
34933493
def test_pickle_compat_construction(self):
34943494
pass
34953495

3496+
def test_construction_index_with_mixed_timezones(self):
3497+
# GH 11488
3498+
# no tz results in DatetimeIndex
3499+
result = Index([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
3500+
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
3501+
self.assert_index_equal(result, exp, exact=True)
3502+
self.assertTrue(isinstance(result, DatetimeIndex))
3503+
self.assertIsNone(result.tz)
3504+
3505+
# same tz results in DatetimeIndex
3506+
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3507+
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
3508+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
3509+
self.assert_index_equal(result, exp, exact=True)
3510+
self.assertTrue(isinstance(result, DatetimeIndex))
3511+
self.assertIsNotNone(result.tz)
3512+
self.assertEqual(result.tz, exp.tz)
3513+
3514+
# same tz results in DatetimeIndex (DST)
3515+
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
3516+
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
3517+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
3518+
tz='US/Eastern', name='idx')
3519+
self.assert_index_equal(result, exp, exact=True)
3520+
self.assertTrue(isinstance(result, DatetimeIndex))
3521+
self.assertIsNotNone(result.tz)
3522+
self.assertEqual(result.tz, exp.tz)
3523+
3524+
# different tz results in Index(dtype=object)
3525+
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3526+
exp = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3527+
dtype='object', name='idx')
3528+
self.assert_index_equal(result, exp, exact=True)
3529+
self.assertFalse(isinstance(result, DatetimeIndex))
3530+
3531+
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3532+
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3533+
exp = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3534+
Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3535+
dtype='object', name='idx')
3536+
self.assert_index_equal(result, exp, exact=True)
3537+
self.assertFalse(isinstance(result, DatetimeIndex))
3538+
3539+
# passing tz results in DatetimeIndex
3540+
result = Index([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3541+
tz='Asia/Tokyo', name='idx')
3542+
exp = DatetimeIndex([Timestamp('2011-01-01 19:00'), Timestamp('2011-01-03 00:00')],
3543+
tz='Asia/Tokyo', name='idx')
3544+
self.assert_index_equal(result, exp, exact=True)
3545+
self.assertTrue(isinstance(result, DatetimeIndex))
3546+
3547+
# length = 1
3548+
result = Index([Timestamp('2011-01-01')], name='idx')
3549+
exp = DatetimeIndex([Timestamp('2011-01-01')], name='idx')
3550+
self.assert_index_equal(result, exp, exact=True)
3551+
self.assertTrue(isinstance(result, DatetimeIndex))
3552+
self.assertIsNone(result.tz)
3553+
3554+
# length = 1 with tz
3555+
result = Index([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo')], name='idx')
3556+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00')], tz='Asia/Tokyo', name='idx')
3557+
self.assert_index_equal(result, exp, exact=True)
3558+
self.assertTrue(isinstance(result, DatetimeIndex))
3559+
self.assertIsNotNone(result.tz)
3560+
self.assertEqual(result.tz, exp.tz)
3561+
3562+
def test_construction_index_with_mixed_timezones_with_NaT(self):
3563+
# GH 11488
3564+
result = Index([pd.NaT, Timestamp('2011-01-01'),
3565+
pd.NaT, Timestamp('2011-01-02')], name='idx')
3566+
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01'),
3567+
pd.NaT, Timestamp('2011-01-02')], name='idx')
3568+
self.assert_index_equal(result, exp, exact=True)
3569+
self.assertTrue(isinstance(result, DatetimeIndex))
3570+
self.assertIsNone(result.tz)
3571+
3572+
# same tz results in DatetimeIndex
3573+
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3574+
pd.NaT, Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
3575+
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 10:00'),
3576+
pd.NaT, Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
3577+
self.assert_index_equal(result, exp, exact=True)
3578+
self.assertTrue(isinstance(result, DatetimeIndex))
3579+
self.assertIsNotNone(result.tz)
3580+
self.assertEqual(result.tz, exp.tz)
3581+
3582+
# same tz results in DatetimeIndex (DST)
3583+
result = Index([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
3584+
pd.NaT, Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
3585+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), pd.NaT, Timestamp('2011-08-01 10:00')],
3586+
tz='US/Eastern', name='idx')
3587+
self.assert_index_equal(result, exp, exact=True)
3588+
self.assertTrue(isinstance(result, DatetimeIndex))
3589+
self.assertIsNotNone(result.tz)
3590+
self.assertEqual(result.tz, exp.tz)
3591+
3592+
# different tz results in Index(dtype=object)
3593+
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
3594+
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3595+
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
3596+
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3597+
dtype='object', name='idx')
3598+
self.assert_index_equal(result, exp, exact=True)
3599+
self.assertFalse(isinstance(result, DatetimeIndex))
3600+
3601+
result = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3602+
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3603+
exp = Index([pd.NaT, Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3604+
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3605+
dtype='object', name='idx')
3606+
self.assert_index_equal(result, exp, exact=True)
3607+
self.assertFalse(isinstance(result, DatetimeIndex))
3608+
3609+
# passing tz results in DatetimeIndex
3610+
result = Index([pd.NaT, Timestamp('2011-01-01 10:00'),
3611+
pd.NaT, Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3612+
tz='Asia/Tokyo', name='idx')
3613+
exp = DatetimeIndex([pd.NaT, Timestamp('2011-01-01 19:00'),
3614+
pd.NaT, Timestamp('2011-01-03 00:00')],
3615+
tz='Asia/Tokyo', name='idx')
3616+
self.assert_index_equal(result, exp, exact=True)
3617+
self.assertTrue(isinstance(result, DatetimeIndex))
3618+
3619+
# all NaT
3620+
result = Index([pd.NaT, pd.NaT], name='idx')
3621+
exp = DatetimeIndex([pd.NaT, pd.NaT], name='idx')
3622+
self.assert_index_equal(result, exp, exact=True)
3623+
self.assertTrue(isinstance(result, DatetimeIndex))
3624+
self.assertIsNone(result.tz)
3625+
3626+
# all NaT with tz
3627+
result = Index([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
3628+
exp = DatetimeIndex([pd.NaT, pd.NaT], tz='Asia/Tokyo', name='idx')
3629+
self.assert_index_equal(result, exp, exact=True)
3630+
self.assertTrue(isinstance(result, DatetimeIndex))
3631+
self.assertIsNotNone(result.tz)
3632+
self.assertEqual(result.tz, exp.tz)
3633+
3634+
def test_construction_dti_with_mixed_timezones(self):
3635+
# GH 11488 (not changed, added explicit tests)
3636+
3637+
# no tz results in DatetimeIndex
3638+
result = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
3639+
exp = DatetimeIndex([Timestamp('2011-01-01'), Timestamp('2011-01-02')], name='idx')
3640+
self.assert_index_equal(result, exp, exact=True)
3641+
self.assertTrue(isinstance(result, DatetimeIndex))
3642+
3643+
# same tz results in DatetimeIndex
3644+
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3645+
Timestamp('2011-01-02 10:00', tz='Asia/Tokyo')], name='idx')
3646+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00')], tz='Asia/Tokyo', name='idx')
3647+
self.assert_index_equal(result, exp, exact=True)
3648+
self.assertTrue(isinstance(result, DatetimeIndex))
3649+
3650+
# same tz results in DatetimeIndex (DST)
3651+
result = DatetimeIndex([Timestamp('2011-01-01 10:00', tz='US/Eastern'),
3652+
Timestamp('2011-08-01 10:00', tz='US/Eastern')], name='idx')
3653+
exp = DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-08-01 10:00')],
3654+
tz='US/Eastern', name='idx')
3655+
self.assert_index_equal(result, exp, exact=True)
3656+
self.assertTrue(isinstance(result, DatetimeIndex))
3657+
3658+
# different tz coerces tz-naive to tz-awareIndex(dtype=object)
3659+
result = DatetimeIndex([Timestamp('2011-01-01 10:00'),
3660+
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3661+
exp = DatetimeIndex([Timestamp('2011-01-01 05:00'), Timestamp('2011-01-02 10:00')],
3662+
tz='US/Eastern', name='idx')
3663+
self.assert_index_equal(result, exp, exact=True)
3664+
self.assertTrue(isinstance(result, DatetimeIndex))
3665+
3666+
# tz mismatch affecting to tz-aware raises TypeError/ValueError
3667+
with tm.assertRaises(ValueError):
3668+
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3669+
Timestamp('2011-01-02 10:00', tz='US/Eastern')], name='idx')
3670+
3671+
with tm.assertRaises(TypeError):
3672+
DatetimeIndex([Timestamp('2011-01-01 10:00'), Timestamp('2011-01-02 10:00', tz='US/Eastern')],
3673+
tz='Asia/Tokyo', name='idx')
3674+
3675+
with tm.assertRaises(ValueError):
3676+
DatetimeIndex([Timestamp('2011-01-01 10:00', tz='Asia/Tokyo'),
3677+
Timestamp('2011-01-02 10:00', tz='US/Eastern')], tz='US/Eastern', name='idx')
3678+
34963679
def test_get_loc(self):
34973680
idx = pd.date_range('2000-01-01', periods=3)
34983681

pandas/util/testing.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -718,7 +718,11 @@ def assert_attr_equal(attr, left, right, obj='Attributes'):
718718
# np.nan
719719
return True
720720

721-
result = left_attr == right_attr
721+
try:
722+
result = left_attr == right_attr
723+
except TypeError:
724+
# datetimetz on rhs may raise TypeError
725+
result = False
722726
if not isinstance(result, bool):
723727
result = result.all()
724728

0 commit comments

Comments
 (0)