Skip to content

Commit 5cedf87

Browse files
authored
BUG: mixed-type mixed-timezone/awareness (pandas-dev#55793)
* BUG: mixed-type mixed-timezone/awareness * more GH refs * un-xfail * comments, test
1 parent 0d786bf commit 5cedf87

File tree

6 files changed

+186
-0
lines changed

6 files changed

+186
-0
lines changed

doc/source/whatsnew/v2.2.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ Datetimelike
338338
- Bug in :func:`concat` raising ``AttributeError`` when concatenating all-NA DataFrame with :class:`DatetimeTZDtype` dtype DataFrame. (:issue:`52093`)
339339
- Bug in :func:`testing.assert_extension_array_equal` that could use the wrong unit when comparing resolutions (:issue:`55730`)
340340
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing a list of mixed-string-and-numeric types incorrectly raising (:issue:`55780`)
341+
- Bug in :func:`to_datetime` and :class:`DatetimeIndex` when passing mixed-type objects with a mix of timezones or mix of timezone-awareness failing to raise ``ValueError`` (:issue:`55693`)
341342
- Bug in :meth:`DatetimeIndex.union` returning object dtype for tz-aware indexes with the same timezone but different units (:issue:`55238`)
342343
- Bug in :meth:`Index.is_monotonic_increasing` and :meth:`Index.is_monotonic_decreasing` always caching :meth:`Index.is_unique` as ``True`` when first value in index is ``NaT`` (:issue:`55755`)
343344
- Bug in :meth:`Index.view` to a datetime64 dtype with non-supported resolution incorrectly raising (:issue:`55710`)

pandas/_libs/tslib.pyx

+25
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ from pandas._libs.tslibs.nattype cimport (
7272
c_nat_strings as nat_strings,
7373
)
7474
from pandas._libs.tslibs.timestamps cimport _Timestamp
75+
from pandas._libs.tslibs.timezones cimport tz_compare
7576

7677
from pandas._libs.tslibs import (
7778
Resolution,
@@ -488,9 +489,11 @@ cpdef array_to_datetime(
488489
elif PyDate_Check(val):
489490
iresult[i] = pydate_to_dt64(val, &dts, reso=creso)
490491
check_dts_bounds(&dts, creso)
492+
state.found_other = True
491493

492494
elif is_datetime64_object(val):
493495
iresult[i] = get_datetime64_nanos(val, creso)
496+
state.found_other = True
494497

495498
elif is_integer_object(val) or is_float_object(val):
496499
# these must be ns unit by-definition
@@ -500,6 +503,7 @@ cpdef array_to_datetime(
500503
else:
501504
# we now need to parse this as if unit='ns'
502505
iresult[i] = cast_from_unit(val, "ns", out_reso=creso)
506+
state.found_other = True
503507

504508
elif isinstance(val, str):
505509
# string
@@ -535,6 +539,7 @@ cpdef array_to_datetime(
535539
# Add a marker for naive string, to track if we are
536540
# parsing mixed naive and aware strings
537541
out_tzoffset_vals.add("naive")
542+
state.found_naive_str = True
538543

539544
else:
540545
raise TypeError(f"{type(val)} is not convertible to datetime")
@@ -558,9 +563,29 @@ cpdef array_to_datetime(
558563
is_same_offsets = len(out_tzoffset_vals) == 1
559564
if not is_same_offsets:
560565
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
566+
elif state.found_naive or state.found_other:
567+
# e.g. test_to_datetime_mixed_awareness_mixed_types
568+
raise ValueError("Cannot mix tz-aware with tz-naive values")
569+
elif tz_out is not None:
570+
# GH#55693
571+
tz_offset = out_tzoffset_vals.pop()
572+
tz_out2 = timezone(timedelta(seconds=tz_offset))
573+
if not tz_compare(tz_out, tz_out2):
574+
# e.g. test_to_datetime_mixed_tzs_mixed_types
575+
raise ValueError(
576+
"Mixed timezones detected. pass utc=True in to_datetime "
577+
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
578+
)
579+
# e.g. test_to_datetime_mixed_types_matching_tzs
561580
else:
562581
tz_offset = out_tzoffset_vals.pop()
563582
tz_out = timezone(timedelta(seconds=tz_offset))
583+
elif not utc_convert:
584+
if tz_out and (state.found_other or state.found_naive_str):
585+
# found_other indicates a tz-naive int, float, dt64, or date
586+
# e.g. test_to_datetime_mixed_awareness_mixed_types
587+
raise ValueError("Cannot mix tz-aware with tz-naive values")
588+
564589
return result, tz_out
565590

566591

pandas/_libs/tslibs/strptime.pxd

+3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ cdef bint parse_today_now(
1414

1515
cdef class DatetimeParseState:
1616
cdef:
17+
# See comments describing these attributes in the __cinit__ method
1718
bint found_tz
1819
bint found_naive
20+
bint found_naive_str
21+
bint found_other
1922
bint creso_ever_changed
2023
NPY_DATETIMEUNIT creso
2124

pandas/_libs/tslibs/strptime.pyx

+7
Original file line numberDiff line numberDiff line change
@@ -242,8 +242,15 @@ cdef _get_format_regex(str fmt):
242242

243243
cdef class DatetimeParseState:
244244
def __cinit__(self, NPY_DATETIMEUNIT creso=NPY_DATETIMEUNIT.NPY_FR_ns):
245+
# found_tz and found_naive are specifically about datetime/Timestamp
246+
# objects with and without tzinfos attached.
245247
self.found_tz = False
246248
self.found_naive = False
249+
# found_naive_str refers to a string that was parsed to a timezone-naive
250+
# datetime.
251+
self.found_naive_str = False
252+
self.found_other = False
253+
247254
self.creso = creso
248255
self.creso_ever_changed = False
249256

pandas/tests/indexes/test_index_new.py

+12
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,15 @@
44
from datetime import (
55
datetime,
66
timedelta,
7+
timezone,
78
)
89
from decimal import Decimal
910

1011
import numpy as np
1112
import pytest
1213

14+
from pandas._libs.tslibs.timezones import maybe_get_tz
15+
1316
from pandas import (
1417
NA,
1518
Categorical,
@@ -183,6 +186,15 @@ def test_constructor_datetime_and_datetime64(self, swap_objs):
183186
tm.assert_index_equal(Index(data), expected)
184187
tm.assert_index_equal(Index(np.array(data, dtype=object)), expected)
185188

189+
def test_constructor_datetimes_mixed_tzs(self):
190+
# https://github.com/pandas-dev/pandas/pull/55793/files#r1383719998
191+
tz = maybe_get_tz("US/Central")
192+
dt1 = datetime(2020, 1, 1, tzinfo=tz)
193+
dt2 = datetime(2020, 1, 1, tzinfo=timezone.utc)
194+
result = Index([dt1, dt2])
195+
expected = Index([dt1, dt2], dtype=object)
196+
tm.assert_index_equal(result, expected)
197+
186198

187199
class TestDtypeEnforced:
188200
# check we don't silently ignore the dtype keyword

pandas/tests/tools/test_to_datetime.py

+138
Original file line numberDiff line numberDiff line change
@@ -3727,3 +3727,141 @@ def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed():
37273727
to_datetime(
37283728
["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed"
37293729
)
3730+
3731+
3732+
def test_to_datetime_mixed_tzs_mixed_types():
3733+
# GH#55793, GH#55693 mismatched tzs but one is str and other is
3734+
# datetime object
3735+
ts = Timestamp("2016-01-02 03:04:05", tz="US/Pacific")
3736+
dtstr = "2023-10-30 15:06+01"
3737+
arr = [ts, dtstr]
3738+
3739+
msg = (
3740+
"Mixed timezones detected. pass utc=True in to_datetime or tz='UTC' "
3741+
"in DatetimeIndex to convert to a common timezone"
3742+
)
3743+
with pytest.raises(ValueError, match=msg):
3744+
to_datetime(arr)
3745+
with pytest.raises(ValueError, match=msg):
3746+
to_datetime(arr, format="mixed")
3747+
with pytest.raises(ValueError, match=msg):
3748+
DatetimeIndex(arr)
3749+
3750+
3751+
def test_to_datetime_mixed_types_matching_tzs():
3752+
# GH#55793
3753+
dtstr = "2023-11-01 09:22:03-07:00"
3754+
ts = Timestamp(dtstr)
3755+
arr = [ts, dtstr]
3756+
res1 = to_datetime(arr)
3757+
res2 = to_datetime(arr[::-1])[::-1]
3758+
res3 = to_datetime(arr, format="mixed")
3759+
res4 = DatetimeIndex(arr)
3760+
3761+
expected = DatetimeIndex([ts, ts])
3762+
tm.assert_index_equal(res1, expected)
3763+
tm.assert_index_equal(res2, expected)
3764+
tm.assert_index_equal(res3, expected)
3765+
tm.assert_index_equal(res4, expected)
3766+
3767+
3768+
dtstr = "2020-01-01 00:00+00:00"
3769+
ts = Timestamp(dtstr)
3770+
3771+
3772+
@pytest.mark.filterwarnings("ignore:Could not infer format:UserWarning")
3773+
@pytest.mark.parametrize(
3774+
"aware_val",
3775+
[dtstr, Timestamp(dtstr)],
3776+
ids=lambda x: type(x).__name__,
3777+
)
3778+
@pytest.mark.parametrize(
3779+
"naive_val",
3780+
[dtstr[:-6], ts.tz_localize(None), ts.date(), ts.asm8, ts.value, float(ts.value)],
3781+
ids=lambda x: type(x).__name__,
3782+
)
3783+
@pytest.mark.parametrize("naive_first", [True, False])
3784+
def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_first):
3785+
# GH#55793, GH#55693
3786+
# Empty string parses to NaT
3787+
vals = [aware_val, naive_val, ""]
3788+
3789+
vec = vals
3790+
if naive_first:
3791+
# alas, the behavior is order-dependent, so we test both ways
3792+
vec = [naive_val, aware_val, ""]
3793+
3794+
# both_strs-> paths that were previously already deprecated with warning
3795+
# issued in _array_to_datetime_object
3796+
both_strs = isinstance(aware_val, str) and isinstance(naive_val, str)
3797+
has_numeric = isinstance(naive_val, (int, float))
3798+
3799+
depr_msg = "In a future version of pandas, parsing datetimes with mixed time zones"
3800+
3801+
first_non_null = next(x for x in vec if x != "")
3802+
# if first_non_null is a not a string, _guess_datetime_format_for_array
3803+
# doesn't guess a format so we don't go through array_strptime
3804+
if not isinstance(first_non_null, str):
3805+
# that case goes through array_strptime which has different behavior
3806+
msg = "Cannot mix tz-aware with tz-naive values"
3807+
if naive_first and isinstance(aware_val, Timestamp):
3808+
if isinstance(naive_val, Timestamp):
3809+
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
3810+
with pytest.raises(ValueError, match=msg):
3811+
to_datetime(vec)
3812+
else:
3813+
with pytest.raises(ValueError, match=msg):
3814+
to_datetime(vec)
3815+
3816+
# No warning/error with utc=True
3817+
to_datetime(vec, utc=True)
3818+
3819+
elif has_numeric and vec.index(aware_val) < vec.index(naive_val):
3820+
msg = "time data .* doesn't match format"
3821+
with pytest.raises(ValueError, match=msg):
3822+
to_datetime(vec)
3823+
with pytest.raises(ValueError, match=msg):
3824+
to_datetime(vec, utc=True)
3825+
3826+
elif both_strs and vec.index(aware_val) < vec.index(naive_val):
3827+
msg = r"time data \"2020-01-01 00:00\" doesn't match format"
3828+
with pytest.raises(ValueError, match=msg):
3829+
to_datetime(vec)
3830+
with pytest.raises(ValueError, match=msg):
3831+
to_datetime(vec, utc=True)
3832+
3833+
elif both_strs and vec.index(naive_val) < vec.index(aware_val):
3834+
msg = "unconverted data remains when parsing with format"
3835+
with pytest.raises(ValueError, match=msg):
3836+
to_datetime(vec)
3837+
with pytest.raises(ValueError, match=msg):
3838+
to_datetime(vec, utc=True)
3839+
3840+
else:
3841+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
3842+
to_datetime(vec)
3843+
3844+
# No warning/error with utc=True
3845+
to_datetime(vec, utc=True)
3846+
3847+
if both_strs:
3848+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
3849+
to_datetime(vec, format="mixed")
3850+
with tm.assert_produces_warning(FutureWarning, match=depr_msg):
3851+
msg = "DatetimeIndex has mixed timezones"
3852+
with pytest.raises(TypeError, match=msg):
3853+
DatetimeIndex(vec)
3854+
else:
3855+
msg = "Cannot mix tz-aware with tz-naive values"
3856+
if naive_first and isinstance(aware_val, Timestamp):
3857+
if isinstance(naive_val, Timestamp):
3858+
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
3859+
with pytest.raises(ValueError, match=msg):
3860+
to_datetime(vec, format="mixed")
3861+
with pytest.raises(ValueError, match=msg):
3862+
DatetimeIndex(vec)
3863+
else:
3864+
with pytest.raises(ValueError, match=msg):
3865+
to_datetime(vec, format="mixed")
3866+
with pytest.raises(ValueError, match=msg):
3867+
DatetimeIndex(vec)

0 commit comments

Comments
 (0)