Skip to content

Commit 5914d8e

Browse files
natmokvalpmhatre1
authored andcommitted
DEPR: disallow parsing datetimes with mixed time zones unless utc=True (pandas-dev#57275)
* correct def _array_to_datetime_object, _array_strptime_object_fallback, fix tests * fix tests * correct to_datetime docs, add a note to v3.0.0 * correct to_datetime docs * fix failures in benchmarks/inference.py * fix pre-commit error * correct examples in to_datetime docs * correct to_datetime docs * delete time_different_offset from benchmarks/inference.py as redundant * correct v3.0.0 * removed _array_to_datetime_object and _array_strptime_object_fallback * correct to_datetime docstring, roll back changes in test_suppress_error_output * fix pre-commit error * correct test_to_datetime_mixed_awareness_mixed_types, and a comment in array_to_datetime
1 parent e4d0796 commit 5914d8e

File tree

11 files changed

+124
-485
lines changed

11 files changed

+124
-485
lines changed

asv_bench/benchmarks/inference.py

+1-4
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,7 @@ def time_same_offset(self):
200200
to_datetime(self.same_offset)
201201

202202
def time_different_offset(self):
203-
to_datetime(self.diff_offset)
203+
to_datetime(self.diff_offset, utc=True)
204204

205205

206206
class ToDatetimeFormatQuarters:
@@ -231,9 +231,6 @@ def time_no_exact(self):
231231
def time_same_offset(self):
232232
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
233233

234-
def time_different_offset(self):
235-
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
236-
237234
def time_same_offset_to_utc(self):
238235
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
239236

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ Removal of prior version deprecations/changes
117117
- All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`)
118118
- All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`)
119119
- Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`)
120+
- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`)
120121
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
121122
- In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`)
122123
- Removed :meth:`DateOffset.is_anchored` and :meth:`offsets.Tick.is_anchored` (:issue:`56594`)

pandas/_libs/tslib.pyx

+7-115
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@ from datetime import timezone
99
from cpython.datetime cimport (
1010
PyDate_Check,
1111
PyDateTime_Check,
12-
datetime,
1312
import_datetime,
1413
timedelta,
1514
tzinfo,
@@ -590,15 +589,17 @@ cpdef array_to_datetime(
590589
return values, None
591590

592591
if seen_datetime_offset and not utc_convert:
593-
# GH#17697
592+
# GH#17697, GH#57275
594593
# 1) If all the offsets are equal, return one offset for
595594
# the parsed dates to (maybe) pass to DatetimeIndex
596-
# 2) If the offsets are different, then force the parsing down the
597-
# object path where an array of datetimes
598-
# (with individual dateutil.tzoffsets) are returned
595+
# 2) If the offsets are different, then do not force the parsing
596+
# and raise a ValueError: "cannot parse datetimes with
597+
# mixed time zones unless `utc=True`" instead
599598
is_same_offsets = len(out_tzoffset_vals) == 1
600599
if not is_same_offsets:
601-
return _array_to_datetime_object(values, errors, dayfirst, yearfirst)
600+
raise ValueError(
601+
"cannot parse datetimes with mixed time zones unless `utc=True`"
602+
)
602603
elif state.found_naive or state.found_other:
603604
# e.g. test_to_datetime_mixed_awareness_mixed_types
604605
raise ValueError("Cannot mix tz-aware with tz-naive values")
@@ -647,115 +648,6 @@ cpdef array_to_datetime(
647648
return result, tz_out
648649

649650

650-
@cython.wraparound(False)
651-
@cython.boundscheck(False)
652-
cdef _array_to_datetime_object(
653-
ndarray[object] values,
654-
str errors,
655-
bint dayfirst=False,
656-
bint yearfirst=False,
657-
):
658-
"""
659-
Fall back function for array_to_datetime
660-
661-
Attempts to parse datetime strings with dateutil to return an array
662-
of datetime objects
663-
664-
Parameters
665-
----------
666-
values : ndarray[object]
667-
date-like objects to convert
668-
errors : str
669-
error behavior when parsing
670-
dayfirst : bool, default False
671-
dayfirst parsing behavior when encountering datetime strings
672-
yearfirst : bool, default False
673-
yearfirst parsing behavior when encountering datetime strings
674-
675-
Returns
676-
-------
677-
np.ndarray[object]
678-
Literal[None]
679-
"""
680-
cdef:
681-
Py_ssize_t i, n = values.size
682-
object val
683-
bint is_coerce = errors == "coerce"
684-
bint is_raise = errors == "raise"
685-
ndarray oresult_nd
686-
ndarray[object] oresult
687-
npy_datetimestruct dts
688-
cnp.broadcast mi
689-
_TSObject tsobj
690-
691-
assert is_raise or is_coerce
692-
693-
oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0)
694-
mi = cnp.PyArray_MultiIterNew2(oresult_nd, values)
695-
oresult = oresult_nd.ravel()
696-
697-
# We return an object array and only attempt to parse:
698-
# 1) NaT or NaT-like values
699-
# 2) datetime strings, which we return as datetime.datetime
700-
# 3) special strings - "now" & "today"
701-
for i in range(n):
702-
# Analogous to: val = values[i]
703-
val = <object>(<PyObject**>cnp.PyArray_MultiIter_DATA(mi, 1))[0]
704-
705-
if checknull_with_nat_and_na(val) or PyDateTime_Check(val):
706-
# GH 25978. No need to parse NaT-like or datetime-like vals
707-
oresult[i] = val
708-
elif isinstance(val, str):
709-
if type(val) is not str:
710-
# GH#32264 np.str_ objects
711-
val = str(val)
712-
713-
if len(val) == 0 or val in nat_strings:
714-
oresult[i] = "NaT"
715-
cnp.PyArray_MultiIter_NEXT(mi)
716-
continue
717-
718-
try:
719-
tsobj = convert_str_to_tsobject(
720-
val, None, dayfirst=dayfirst, yearfirst=yearfirst
721-
)
722-
tsobj.ensure_reso(NPY_FR_ns, val)
723-
724-
dts = tsobj.dts
725-
oresult[i] = datetime(
726-
dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us,
727-
tzinfo=tsobj.tzinfo,
728-
fold=tsobj.fold,
729-
)
730-
731-
except (ValueError, OverflowError) as ex:
732-
ex.args = (f"{ex}, at position {i}", )
733-
if is_coerce:
734-
oresult[i] = <object>NaT
735-
cnp.PyArray_MultiIter_NEXT(mi)
736-
continue
737-
if is_raise:
738-
raise
739-
return values, None
740-
else:
741-
if is_raise:
742-
raise
743-
return values, None
744-
745-
cnp.PyArray_MultiIter_NEXT(mi)
746-
747-
warnings.warn(
748-
"In a future version of pandas, parsing datetimes with mixed time "
749-
"zones will raise an error unless `utc=True`. "
750-
"Please specify `utc=True` to opt in to the new behaviour "
751-
"and silence this warning. To create a `Series` with mixed offsets and "
752-
"`object` dtype, please use `apply` and `datetime.datetime.strptime`",
753-
FutureWarning,
754-
stacklevel=find_stack_level(),
755-
)
756-
return oresult_nd, None
757-
758-
759651
def array_to_datetime_with_tz(
760652
ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso
761653
):

pandas/_libs/tslibs/strptime.pyx

+7-160
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,6 @@ from pandas._libs.tslibs.dtypes cimport (
5858
)
5959
from pandas._libs.tslibs.nattype cimport (
6060
NPY_NAT,
61-
c_NaT as NaT,
6261
c_nat_strings as nat_strings,
6362
)
6463
from pandas._libs.tslibs.np_datetime cimport (
@@ -503,20 +502,18 @@ def array_strptime(
503502
if seen_datetime_offset and not utc:
504503
is_same_offsets = len(out_tzoffset_vals) == 1
505504
if not is_same_offsets or (state.found_naive or state.found_other):
506-
result2 = _array_strptime_object_fallback(
507-
values, fmt=fmt, exact=exact, errors=errors, utc=utc
505+
raise ValueError(
506+
"cannot parse datetimes with mixed time zones unless `utc=True`"
508507
)
509-
return result2, None
510508
elif tz_out is not None:
511509
# GH#55693
512510
tz_offset = out_tzoffset_vals.pop()
513511
tz_out2 = timezone(timedelta(seconds=tz_offset))
514512
if not tz_compare(tz_out, tz_out2):
515-
# e.g. test_to_datetime_mixed_offsets_with_utc_false_deprecated
516-
result2 = _array_strptime_object_fallback(
517-
values, fmt=fmt, exact=exact, errors=errors, utc=utc
513+
# e.g. test_to_datetime_mixed_offsets_with_utc_false_removed
514+
raise ValueError(
515+
"cannot parse datetimes with mixed time zones unless `utc=True`"
518516
)
519-
return result2, None
520517
# e.g. test_guess_datetime_format_with_parseable_formats
521518
else:
522519
# e.g. test_to_datetime_iso8601_with_timezone_valid
@@ -525,10 +522,9 @@ def array_strptime(
525522
elif not utc:
526523
if tz_out and (state.found_other or state.found_naive_str):
527524
# found_other indicates a tz-naive int, float, dt64, or date
528-
result2 = _array_strptime_object_fallback(
529-
values, fmt=fmt, exact=exact, errors=errors, utc=utc
525+
raise ValueError(
526+
"cannot parse datetimes with mixed time zones unless `utc=True`"
530527
)
531-
return result2, None
532528

533529
if infer_reso:
534530
if state.creso_ever_changed:
@@ -790,155 +786,6 @@ cdef tzinfo _parse_with_format(
790786
return tz
791787

792788

793-
def _array_strptime_object_fallback(
794-
ndarray[object] values,
795-
str fmt,
796-
bint exact=True,
797-
errors="raise",
798-
bint utc=False,
799-
):
800-
801-
cdef:
802-
Py_ssize_t i, n = len(values)
803-
npy_datetimestruct dts
804-
int64_t iresult
805-
object val
806-
tzinfo tz
807-
bint is_raise = errors=="raise"
808-
bint is_coerce = errors=="coerce"
809-
bint iso_format = format_is_iso(fmt)
810-
NPY_DATETIMEUNIT creso, out_bestunit, item_reso
811-
int out_local = 0, out_tzoffset = 0
812-
bint string_to_dts_succeeded = 0
813-
814-
assert is_raise or is_coerce
815-
816-
item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC
817-
format_regex, locale_time = _get_format_regex(fmt)
818-
819-
result = np.empty(n, dtype=object)
820-
821-
dts.us = dts.ps = dts.as = 0
822-
823-
for i in range(n):
824-
val = values[i]
825-
try:
826-
if isinstance(val, str):
827-
if len(val) == 0 or val in nat_strings:
828-
result[i] = NaT
829-
continue
830-
elif checknull_with_nat_and_na(val):
831-
result[i] = NaT
832-
continue
833-
elif PyDateTime_Check(val):
834-
result[i] = Timestamp(val)
835-
continue
836-
elif PyDate_Check(val):
837-
result[i] = Timestamp(val)
838-
continue
839-
elif cnp.is_datetime64_object(val):
840-
result[i] = Timestamp(val)
841-
continue
842-
elif (
843-
(is_integer_object(val) or is_float_object(val))
844-
and (val != val or val == NPY_NAT)
845-
):
846-
result[i] = NaT
847-
continue
848-
else:
849-
val = str(val)
850-
851-
if fmt == "ISO8601":
852-
string_to_dts_succeeded = not string_to_dts(
853-
val, &dts, &out_bestunit, &out_local,
854-
&out_tzoffset, False, None, False
855-
)
856-
elif iso_format:
857-
string_to_dts_succeeded = not string_to_dts(
858-
val, &dts, &out_bestunit, &out_local,
859-
&out_tzoffset, False, fmt, exact
860-
)
861-
if string_to_dts_succeeded:
862-
# No error reported by string_to_dts, pick back up
863-
# where we left off
864-
creso = get_supported_reso(out_bestunit)
865-
try:
866-
value = npy_datetimestruct_to_datetime(creso, &dts)
867-
except OverflowError as err:
868-
raise OutOfBoundsDatetime(
869-
f"Out of bounds nanosecond timestamp: {val}"
870-
) from err
871-
if out_local == 1:
872-
tz = timezone(timedelta(minutes=out_tzoffset))
873-
value = tz_localize_to_utc_single(
874-
value, tz, ambiguous="raise", nonexistent=None, creso=creso
875-
)
876-
else:
877-
tz = None
878-
ts = Timestamp._from_value_and_reso(value, creso, tz)
879-
result[i] = ts
880-
continue
881-
882-
if parse_today_now(val, &iresult, utc, NPY_FR_ns):
883-
result[i] = Timestamp(val)
884-
continue
885-
886-
# Some ISO formats can't be parsed by string_to_dts
887-
# For example, 6-digit YYYYMD. So, if there's an error, and a format
888-
# was specified, then try the string-matching code below. If the format
889-
# specified was 'ISO8601', then we need to error, because
890-
# only string_to_dts handles mixed ISO8601 formats.
891-
if not string_to_dts_succeeded and fmt == "ISO8601":
892-
raise ValueError(f"Time data {val} is not ISO8601 format")
893-
894-
tz = _parse_with_format(
895-
val, fmt, exact, format_regex, locale_time, &dts, &item_reso
896-
)
897-
try:
898-
iresult = npy_datetimestruct_to_datetime(item_reso, &dts)
899-
except OverflowError as err:
900-
raise OutOfBoundsDatetime(
901-
f"Out of bounds nanosecond timestamp: {val}"
902-
) from err
903-
if tz is not None:
904-
iresult = tz_localize_to_utc_single(
905-
iresult, tz, ambiguous="raise", nonexistent=None, creso=item_reso
906-
)
907-
ts = Timestamp._from_value_and_reso(iresult, item_reso, tz)
908-
result[i] = ts
909-
910-
except (ValueError, OutOfBoundsDatetime) as ex:
911-
ex.args = (
912-
f"{str(ex)}, at position {i}. You might want to try:\n"
913-
" - passing `format` if your strings have a consistent format;\n"
914-
" - passing `format='ISO8601'` if your strings are "
915-
"all ISO8601 but not necessarily in exactly the same format;\n"
916-
" - passing `format='mixed'`, and the format will be "
917-
"inferred for each element individually. "
918-
"You might want to use `dayfirst` alongside this.",
919-
)
920-
if is_coerce:
921-
result[i] = NaT
922-
continue
923-
else:
924-
raise
925-
926-
import warnings
927-
928-
from pandas.util._exceptions import find_stack_level
929-
warnings.warn(
930-
"In a future version of pandas, parsing datetimes with mixed time "
931-
"zones will raise an error unless `utc=True`. Please specify `utc=True` "
932-
"to opt in to the new behaviour and silence this warning. "
933-
"To create a `Series` with mixed offsets and `object` dtype, "
934-
"please use `apply` and `datetime.datetime.strptime`",
935-
FutureWarning,
936-
stacklevel=find_stack_level(),
937-
)
938-
939-
return result
940-
941-
942789
class TimeRE(_TimeRE):
943790
"""
944791
Handle conversion from format directives to regexes.

0 commit comments

Comments
 (0)