Skip to content

REF: de-duplicate tzinfo-awareness mismatch checks #58171

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Apr 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 4 additions & 43 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,6 @@ from pandas._libs.tslibs.nattype cimport (
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.timestamps cimport _Timestamp
from pandas._libs.tslibs.timezones cimport tz_compare

from pandas._libs.tslibs import (
Resolution,
Expand Down Expand Up @@ -452,13 +451,9 @@ cpdef array_to_datetime(
ndarray[int64_t] iresult
npy_datetimestruct dts
bint utc_convert = bool(utc)
bint seen_datetime_offset = False
bint is_raise = errors == "raise"
bint is_coerce = errors == "coerce"
bint is_same_offsets
_TSObject tsobj
float tz_offset
set out_tzoffset_vals = set()
tzinfo tz, tz_out = None
cnp.flatiter it = cnp.PyArray_IterNew(values)
NPY_DATETIMEUNIT item_reso
Expand Down Expand Up @@ -568,12 +563,12 @@ cpdef array_to_datetime(
# dateutil timezone objects cannot be hashed, so
# store the UTC offsets in seconds instead
nsecs = tz.utcoffset(None).total_seconds()
out_tzoffset_vals.add(nsecs)
seen_datetime_offset = True
state.out_tzoffset_vals.add(nsecs)
state.found_aware_str = True
else:
# Add a marker for naive string, to track if we are
# parsing mixed naive and aware strings
out_tzoffset_vals.add("naive")
state.out_tzoffset_vals.add("naive")
state.found_naive_str = True

else:
Expand All @@ -588,41 +583,7 @@ cpdef array_to_datetime(
raise
return values, None

if seen_datetime_offset and not utc_convert:
# GH#17697, GH#57275
# 1) If all the offsets are equal, return one offset for
# the parsed dates to (maybe) pass to DatetimeIndex
# 2) If the offsets are different, then do not force the parsing
# and raise a ValueError: "cannot parse datetimes with
# mixed time zones unless `utc=True`" instead
is_same_offsets = len(out_tzoffset_vals) == 1
if not is_same_offsets:
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
elif state.found_naive or state.found_other:
# e.g. test_to_datetime_mixed_awareness_mixed_types
raise ValueError("Cannot mix tz-aware with tz-naive values")
elif tz_out is not None:
# GH#55693
tz_offset = out_tzoffset_vals.pop()
tz_out2 = timezone(timedelta(seconds=tz_offset))
if not tz_compare(tz_out, tz_out2):
# e.g. test_to_datetime_mixed_tzs_mixed_types
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
# e.g. test_to_datetime_mixed_types_matching_tzs
else:
tz_offset = out_tzoffset_vals.pop()
tz_out = timezone(timedelta(seconds=tz_offset))
elif not utc_convert:
if tz_out and (state.found_other or state.found_naive_str):
# found_other indicates a tz-naive int, float, dt64, or date
# e.g. test_to_datetime_mixed_awareness_mixed_types
raise ValueError("Cannot mix tz-aware with tz-naive values")
tz_out = state.check_for_mixed_inputs(tz_out, utc)

if infer_reso:
if state.creso_ever_changed:
Expand Down
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/strptime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,12 @@ cdef class DatetimeParseState:
bint found_tz
bint found_naive
bint found_naive_str
bint found_aware_str
bint found_other
bint creso_ever_changed
NPY_DATETIMEUNIT creso
set out_tzoffset_vals

cdef tzinfo process_datetime(self, datetime dt, tzinfo tz, bint utc_convert)
cdef bint update_creso(self, NPY_DATETIMEUNIT item_reso) noexcept
cdef tzinfo check_for_mixed_inputs(self, tzinfo tz_out, bint utc)
100 changes: 62 additions & 38 deletions pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -252,8 +252,11 @@ cdef class DatetimeParseState:
# found_naive_str refers to a string that was parsed to a timezone-naive
# datetime.
self.found_naive_str = False
self.found_aware_str = False
self.found_other = False

self.out_tzoffset_vals = set()

self.creso = creso
self.creso_ever_changed = False

Expand Down Expand Up @@ -292,6 +295,58 @@ cdef class DatetimeParseState:
"tz-naive values")
return tz

cdef tzinfo check_for_mixed_inputs(
self,
tzinfo tz_out,
bint utc,
):
cdef:
bint is_same_offsets
float tz_offset

if self.found_aware_str and not utc:
# GH#17697, GH#57275
# 1) If all the offsets are equal, return one offset for
# the parsed dates to (maybe) pass to DatetimeIndex
# 2) If the offsets are different, then do not force the parsing
# and raise a ValueError: "cannot parse datetimes with
# mixed time zones unless `utc=True`" instead
is_same_offsets = len(self.out_tzoffset_vals) == 1
if not is_same_offsets or (self.found_naive or self.found_other):
# e.g. test_to_datetime_mixed_awareness_mixed_types (array_to_datetime)
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
elif tz_out is not None:
# GH#55693
tz_offset = self.out_tzoffset_vals.pop()
tz_out2 = timezone(timedelta(seconds=tz_offset))
if not tz_compare(tz_out, tz_out2):
# e.g. (array_strptime)
# test_to_datetime_mixed_offsets_with_utc_false_removed
# e.g. test_to_datetime_mixed_tzs_mixed_types (array_to_datetime)
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
# e.g. (array_strptime)
# test_guess_datetime_format_with_parseable_formats
# e.g. test_to_datetime_mixed_types_matching_tzs (array_to_datetime)
else:
# e.g. test_to_datetime_iso8601_with_timezone_valid (array_strptime)
tz_offset = self.out_tzoffset_vals.pop()
tz_out = timezone(timedelta(seconds=tz_offset))
elif not utc:
if tz_out and (self.found_other or self.found_naive_str):
# found_other indicates a tz-naive int, float, dt64, or date
# e.g. test_to_datetime_mixed_awareness_mixed_types (array_to_datetime)
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
return tz_out


def array_strptime(
ndarray[object] values,
Expand Down Expand Up @@ -319,11 +374,8 @@ def array_strptime(
npy_datetimestruct dts
int64_t[::1] iresult
object val
bint seen_datetime_offset = False
bint is_raise = errors=="raise"
bint is_coerce = errors=="coerce"
bint is_same_offsets
set out_tzoffset_vals = set()
tzinfo tz, tz_out = None
bint iso_format = format_is_iso(fmt)
NPY_DATETIMEUNIT out_bestunit, item_reso
Expand Down Expand Up @@ -418,15 +470,15 @@ def array_strptime(
) from err
if out_local == 1:
nsecs = out_tzoffset * 60
out_tzoffset_vals.add(nsecs)
seen_datetime_offset = True
state.out_tzoffset_vals.add(nsecs)
state.found_aware_str = True
tz = timezone(timedelta(minutes=out_tzoffset))
value = tz_localize_to_utc_single(
value, tz, ambiguous="raise", nonexistent=None, creso=creso
)
else:
tz = None
out_tzoffset_vals.add("naive")
state.out_tzoffset_vals.add("naive")
state.found_naive_str = True
iresult[i] = value
continue
Expand Down Expand Up @@ -475,12 +527,12 @@ def array_strptime(
elif creso == NPY_DATETIMEUNIT.NPY_FR_ms:
nsecs = nsecs // 10**3

out_tzoffset_vals.add(nsecs)
seen_datetime_offset = True
state.out_tzoffset_vals.add(nsecs)
state.found_aware_str = True
else:
state.found_naive_str = True
tz = None
out_tzoffset_vals.add("naive")
state.out_tzoffset_vals.add("naive")

except ValueError as ex:
ex.args = (
Expand All @@ -499,35 +551,7 @@ def array_strptime(
raise
return values, None

if seen_datetime_offset and not utc:
is_same_offsets = len(out_tzoffset_vals) == 1
if not is_same_offsets or (state.found_naive or state.found_other):
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
elif tz_out is not None:
# GH#55693
tz_offset = out_tzoffset_vals.pop()
tz_out2 = timezone(timedelta(seconds=tz_offset))
if not tz_compare(tz_out, tz_out2):
# e.g. test_to_datetime_mixed_offsets_with_utc_false_removed
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
# e.g. test_guess_datetime_format_with_parseable_formats
else:
# e.g. test_to_datetime_iso8601_with_timezone_valid
tz_offset = out_tzoffset_vals.pop()
tz_out = timezone(timedelta(seconds=tz_offset))
elif not utc:
if tz_out and (state.found_other or state.found_naive_str):
# found_other indicates a tz-naive int, float, dt64, or date
raise ValueError(
"Mixed timezones detected. Pass utc=True in to_datetime "
"or tz='UTC' in DatetimeIndex to convert to a common timezone."
)
tz_out = state.check_for_mixed_inputs(tz_out, utc)

if infer_reso:
if state.creso_ever_changed:
Expand Down
18 changes: 14 additions & 4 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -3545,19 +3545,27 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
# issued in _array_to_datetime_object
both_strs = isinstance(aware_val, str) and isinstance(naive_val, str)
has_numeric = isinstance(naive_val, (int, float))
both_datetime = isinstance(naive_val, datetime) and isinstance(aware_val, datetime)

mixed_msg = (
"Mixed timezones detected. Pass utc=True in to_datetime or tz='UTC' "
"in DatetimeIndex to convert to a common timezone"
)

first_non_null = next(x for x in vec if x != "")
# if first_non_null is a not a string, _guess_datetime_format_for_array
# doesn't guess a format so we don't go through array_strptime
if not isinstance(first_non_null, str):
# that case goes through array_strptime which has different behavior
msg = "Cannot mix tz-aware with tz-naive values"
msg = mixed_msg
if naive_first and isinstance(aware_val, Timestamp):
if isinstance(naive_val, Timestamp):
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
with pytest.raises(ValueError, match=msg):
to_datetime(vec)
else:
if not naive_first and both_datetime:
msg = "Cannot mix tz-aware with tz-naive values"
with pytest.raises(ValueError, match=msg):
to_datetime(vec)

Expand Down Expand Up @@ -3586,21 +3594,21 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
to_datetime(vec, utc=True)

else:
msg = "Mixed timezones detected. Pass utc=True in to_datetime"
msg = mixed_msg
with pytest.raises(ValueError, match=msg):
to_datetime(vec)

# No warning/error with utc=True
to_datetime(vec, utc=True)

if both_strs:
msg = "Mixed timezones detected. Pass utc=True in to_datetime"
msg = mixed_msg
with pytest.raises(ValueError, match=msg):
to_datetime(vec, format="mixed")
with pytest.raises(ValueError, match=msg):
DatetimeIndex(vec)
else:
msg = "Cannot mix tz-aware with tz-naive values"
msg = mixed_msg
if naive_first and isinstance(aware_val, Timestamp):
if isinstance(naive_val, Timestamp):
msg = "Tz-aware datetime.datetime cannot be converted to datetime64"
Expand All @@ -3609,6 +3617,8 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir
with pytest.raises(ValueError, match=msg):
DatetimeIndex(vec)
else:
if not naive_first and both_datetime:
msg = "Cannot mix tz-aware with tz-naive values"
with pytest.raises(ValueError, match=msg):
to_datetime(vec, format="mixed")
with pytest.raises(ValueError, match=msg):
Expand Down
Loading