diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index 30cab00e4d3f9..ce3935d2cd0ac 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -200,7 +200,7 @@ def time_same_offset(self): to_datetime(self.same_offset) def time_different_offset(self): - to_datetime(self.diff_offset) + to_datetime(self.diff_offset, utc=True) class ToDatetimeFormatQuarters: @@ -231,9 +231,6 @@ def time_no_exact(self): def time_same_offset(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z") - def time_different_offset(self): - to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z") - def time_same_offset_to_utc(self): to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 35769d9c5f0d8..cdc1420192e57 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -113,6 +113,7 @@ Removal of prior version deprecations/changes - All arguments in :meth:`Index.sort_values` are now keyword only (:issue:`56493`) - All arguments in :meth:`Series.to_dict` are now keyword only (:issue:`56493`) - Changed the default value of ``observed`` in :meth:`DataFrame.groupby` and :meth:`Series.groupby` to ``True`` (:issue:`51811`) +- Enforced deprecation disallowing parsing datetimes with mixed time zones unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`57275`) - Enforced silent-downcasting deprecation for :ref:`all relevant methods ` (:issue:`54710`) - In :meth:`DataFrame.stack`, the default value of ``future_stack`` is now ``True``; specifying ``False`` will raise a ``FutureWarning`` (:issue:`55448`) - Removed ``DataFrame.applymap``, ``Styler.applymap`` and ``Styler.applymap_index`` (:issue:`52364`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 4f70b77780c2b..a09f4321c0d3c 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -9,7 +9,6 @@ from datetime import timezone from cpython.datetime cimport ( PyDate_Check, PyDateTime_Check, - datetime, import_datetime, timedelta, tzinfo, @@ -590,15 +589,17 @@ cpdef array_to_datetime( return values, None if seen_datetime_offset and not utc_convert: - # GH#17697 + # GH#17697, GH#57275 # 1) If all the offsets are equal, return one offset for # the parsed dates to (maybe) pass to DatetimeIndex - # 2) If the offsets are different, then force the parsing down the - # object path where an array of datetimes - # (with individual dateutil.tzoffsets) are returned + # 2) If the offsets are different, then do not force the parsing + # and raise a ValueError: "cannot parse datetimes with + # mixed time zones unless `utc=True`" instead is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets: - return _array_to_datetime_object(values, errors, dayfirst, yearfirst) + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" + ) elif state.found_naive or state.found_other: # e.g. test_to_datetime_mixed_awareness_mixed_types raise ValueError("Cannot mix tz-aware with tz-naive values") @@ -647,115 +648,6 @@ cpdef array_to_datetime( return result, tz_out -@cython.wraparound(False) -@cython.boundscheck(False) -cdef _array_to_datetime_object( - ndarray[object] values, - str errors, - bint dayfirst=False, - bint yearfirst=False, -): - """ - Fall back function for array_to_datetime - - Attempts to parse datetime strings with dateutil to return an array - of datetime objects - - Parameters - ---------- - values : ndarray[object] - date-like objects to convert - errors : str - error behavior when parsing - dayfirst : bool, default False - dayfirst parsing behavior when encountering datetime strings - yearfirst : bool, default False - yearfirst parsing behavior when encountering datetime strings - - Returns - ------- - np.ndarray[object] - Literal[None] - """ - cdef: - Py_ssize_t i, n = values.size - object val - bint is_coerce = errors == "coerce" - bint is_raise = errors == "raise" - ndarray oresult_nd - ndarray[object] oresult - npy_datetimestruct dts - cnp.broadcast mi - _TSObject tsobj - - assert is_raise or is_coerce - - oresult_nd = cnp.PyArray_EMPTY(values.ndim, values.shape, cnp.NPY_OBJECT, 0) - mi = cnp.PyArray_MultiIterNew2(oresult_nd, values) - oresult = oresult_nd.ravel() - - # We return an object array and only attempt to parse: - # 1) NaT or NaT-like values - # 2) datetime strings, which we return as datetime.datetime - # 3) special strings - "now" & "today" - for i in range(n): - # Analogous to: val = values[i] - val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] - - if checknull_with_nat_and_na(val) or PyDateTime_Check(val): - # GH 25978. No need to parse NaT-like or datetime-like vals - oresult[i] = val - elif isinstance(val, str): - if type(val) is not str: - # GH#32264 np.str_ objects - val = str(val) - - if len(val) == 0 or val in nat_strings: - oresult[i] = "NaT" - cnp.PyArray_MultiIter_NEXT(mi) - continue - - try: - tsobj = convert_str_to_tsobject( - val, None, dayfirst=dayfirst, yearfirst=yearfirst - ) - tsobj.ensure_reso(NPY_FR_ns, val) - - dts = tsobj.dts - oresult[i] = datetime( - dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us, - tzinfo=tsobj.tzinfo, - fold=tsobj.fold, - ) - - except (ValueError, OverflowError) as ex: - ex.args = (f"{ex}, at position {i}", ) - if is_coerce: - oresult[i] = NaT - cnp.PyArray_MultiIter_NEXT(mi) - continue - if is_raise: - raise - return values, None - else: - if is_raise: - raise - return values, None - - cnp.PyArray_MultiIter_NEXT(mi) - - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise an error unless `utc=True`. " - "Please specify `utc=True` to opt in to the new behaviour " - "and silence this warning. To create a `Series` with mixed offsets and " - "`object` dtype, please use `apply` and `datetime.datetime.strptime`", - FutureWarning, - stacklevel=find_stack_level(), - ) - return oresult_nd, None - - def array_to_datetime_with_tz( ndarray values, tzinfo tz, bint dayfirst, bint yearfirst, NPY_DATETIMEUNIT creso ): diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index cfced4ab44aa0..cd2475830b013 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -58,7 +58,6 @@ from pandas._libs.tslibs.dtypes cimport ( ) from pandas._libs.tslibs.nattype cimport ( NPY_NAT, - c_NaT as NaT, c_nat_strings as nat_strings, ) from pandas._libs.tslibs.np_datetime cimport ( @@ -503,20 +502,18 @@ def array_strptime( if seen_datetime_offset and not utc: is_same_offsets = len(out_tzoffset_vals) == 1 if not is_same_offsets or (state.found_naive or state.found_other): - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None elif tz_out is not None: # GH#55693 tz_offset = out_tzoffset_vals.pop() tz_out2 = timezone(timedelta(seconds=tz_offset)) if not tz_compare(tz_out, tz_out2): - # e.g. test_to_datetime_mixed_offsets_with_utc_false_deprecated - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + # e.g. test_to_datetime_mixed_offsets_with_utc_false_removed + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None # e.g. test_guess_datetime_format_with_parseable_formats else: # e.g. test_to_datetime_iso8601_with_timezone_valid @@ -525,10 +522,9 @@ def array_strptime( elif not utc: if tz_out and (state.found_other or state.found_naive_str): # found_other indicates a tz-naive int, float, dt64, or date - result2 = _array_strptime_object_fallback( - values, fmt=fmt, exact=exact, errors=errors, utc=utc + raise ValueError( + "cannot parse datetimes with mixed time zones unless `utc=True`" ) - return result2, None if infer_reso: if state.creso_ever_changed: @@ -790,155 +786,6 @@ cdef tzinfo _parse_with_format( return tz -def _array_strptime_object_fallback( - ndarray[object] values, - str fmt, - bint exact=True, - errors="raise", - bint utc=False, -): - - cdef: - Py_ssize_t i, n = len(values) - npy_datetimestruct dts - int64_t iresult - object val - tzinfo tz - bint is_raise = errors=="raise" - bint is_coerce = errors=="coerce" - bint iso_format = format_is_iso(fmt) - NPY_DATETIMEUNIT creso, out_bestunit, item_reso - int out_local = 0, out_tzoffset = 0 - bint string_to_dts_succeeded = 0 - - assert is_raise or is_coerce - - item_reso = NPY_DATETIMEUNIT.NPY_FR_GENERIC - format_regex, locale_time = _get_format_regex(fmt) - - result = np.empty(n, dtype=object) - - dts.us = dts.ps = dts.as = 0 - - for i in range(n): - val = values[i] - try: - if isinstance(val, str): - if len(val) == 0 or val in nat_strings: - result[i] = NaT - continue - elif checknull_with_nat_and_na(val): - result[i] = NaT - continue - elif PyDateTime_Check(val): - result[i] = Timestamp(val) - continue - elif PyDate_Check(val): - result[i] = Timestamp(val) - continue - elif cnp.is_datetime64_object(val): - result[i] = Timestamp(val) - continue - elif ( - (is_integer_object(val) or is_float_object(val)) - and (val != val or val == NPY_NAT) - ): - result[i] = NaT - continue - else: - val = str(val) - - if fmt == "ISO8601": - string_to_dts_succeeded = not string_to_dts( - val, &dts, &out_bestunit, &out_local, - &out_tzoffset, False, None, False - ) - elif iso_format: - string_to_dts_succeeded = not string_to_dts( - val, &dts, &out_bestunit, &out_local, - &out_tzoffset, False, fmt, exact - ) - if string_to_dts_succeeded: - # No error reported by string_to_dts, pick back up - # where we left off - creso = get_supported_reso(out_bestunit) - try: - value = npy_datetimestruct_to_datetime(creso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {val}" - ) from err - if out_local == 1: - tz = timezone(timedelta(minutes=out_tzoffset)) - value = tz_localize_to_utc_single( - value, tz, ambiguous="raise", nonexistent=None, creso=creso - ) - else: - tz = None - ts = Timestamp._from_value_and_reso(value, creso, tz) - result[i] = ts - continue - - if parse_today_now(val, &iresult, utc, NPY_FR_ns): - result[i] = Timestamp(val) - continue - - # Some ISO formats can't be parsed by string_to_dts - # For example, 6-digit YYYYMD. So, if there's an error, and a format - # was specified, then try the string-matching code below. If the format - # specified was 'ISO8601', then we need to error, because - # only string_to_dts handles mixed ISO8601 formats. - if not string_to_dts_succeeded and fmt == "ISO8601": - raise ValueError(f"Time data {val} is not ISO8601 format") - - tz = _parse_with_format( - val, fmt, exact, format_regex, locale_time, &dts, &item_reso - ) - try: - iresult = npy_datetimestruct_to_datetime(item_reso, &dts) - except OverflowError as err: - raise OutOfBoundsDatetime( - f"Out of bounds nanosecond timestamp: {val}" - ) from err - if tz is not None: - iresult = tz_localize_to_utc_single( - iresult, tz, ambiguous="raise", nonexistent=None, creso=item_reso - ) - ts = Timestamp._from_value_and_reso(iresult, item_reso, tz) - result[i] = ts - - except (ValueError, OutOfBoundsDatetime) as ex: - ex.args = ( - f"{str(ex)}, at position {i}. You might want to try:\n" - " - passing `format` if your strings have a consistent format;\n" - " - passing `format='ISO8601'` if your strings are " - "all ISO8601 but not necessarily in exactly the same format;\n" - " - passing `format='mixed'`, and the format will be " - "inferred for each element individually. " - "You might want to use `dayfirst` alongside this.", - ) - if is_coerce: - result[i] = NaT - continue - else: - raise - - import warnings - - from pandas.util._exceptions import find_stack_level - warnings.warn( - "In a future version of pandas, parsing datetimes with mixed time " - "zones will raise an error unless `utc=True`. Please specify `utc=True` " - "to opt in to the new behaviour and silence this warning. " - "To create a `Series` with mixed offsets and `object` dtype, " - "please use `apply` and `datetime.datetime.strptime`", - FutureWarning, - stacklevel=find_stack_level(), - ) - - return result - - class TimeRE(_TimeRE): """ Handle conversion from format directives to regexes. diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 325ba90c21c29..c785f0c3a6985 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -723,14 +723,6 @@ def to_datetime( offsets (typically, daylight savings), see :ref:`Examples ` section for details. - .. warning:: - - In a future version of pandas, parsing datetimes with mixed time - zones will raise an error unless `utc=True`. - Please specify `utc=True` to opt in to the new behaviour - and silence this warning. To create a `Series` with mixed offsets and - `object` dtype, please use `apply` and `datetime.datetime.strptime`. - See also: pandas general documentation about `timezone conversion and localization >> pd.to_datetime( ... ["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"] ... ) # doctest: +SKIP - FutureWarning: In a future version of pandas, parsing datetimes with mixed - time zones will raise an error unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. To create a `Series` - with mixed offsets and `object` dtype, please use `apply` and - `datetime.datetime.strptime`. - Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], - dtype='object') + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` + + - To create a :class:`Series` with mixed offsets and ``object`` dtype, please use + :meth:`Series.apply` and :func:`datetime.datetime.strptime`: + + >>> import datetime as dt + >>> ser = pd.Series(["2020-10-25 02:00 +0200", "2020-10-25 04:00 +0100"]) + >>> ser.apply(lambda x: dt.datetime.strptime(x, "%Y-%m-%d %H:%M %z")) + 0 2020-10-25 02:00:00+02:00 + 1 2020-10-25 04:00:00+01:00 + dtype: object - - A mix of timezone-aware and timezone-naive inputs is also converted to - a simple :class:`Index` containing :class:`datetime.datetime` objects: + - A mix of timezone-aware and timezone-naive inputs will also raise a ValueError + unless ``utc=True``: >>> from datetime import datetime >>> pd.to_datetime( ... ["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)] ... ) # doctest: +SKIP - FutureWarning: In a future version of pandas, parsing datetimes with mixed - time zones will raise an error unless `utc=True`. Please specify `utc=True` - to opt in to the new behaviour and silence this warning. To create a `Series` - with mixed offsets and `object` dtype, please use `apply` and - `datetime.datetime.strptime`. - Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') + ValueError: cannot parse datetimes with mixed time zones unless `utc=True` | diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index de246a2757409..3643cac04e0ca 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1292,14 +1292,7 @@ def _try_convert_to_date(self, data: Series) -> tuple[Series, bool]: date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time " - "zones will raise an error", - category=FutureWarning, - ) - new_data = to_datetime(new_data, errors="raise", unit=date_unit) + new_data = to_datetime(new_data, errors="raise", unit=date_unit) except (ValueError, OverflowError, TypeError): continue return new_data, True diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 6a3834be20d39..70a90a3e37d62 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1158,24 +1158,18 @@ def converter(*date_cols, col: Hashable): date_format.get(col) if isinstance(date_format, dict) else date_format ) - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones will raise an error", - category=FutureWarning, + str_objs = ensure_object(strs) + try: + result = tools.to_datetime( + str_objs, + format=date_fmt, + utc=False, + dayfirst=dayfirst, + cache=cache_dates, ) - str_objs = ensure_object(strs) - try: - result = tools.to_datetime( - str_objs, - format=date_fmt, - utc=False, - dayfirst=dayfirst, - cache=cache_dates, - ) - except (ValueError, TypeError): - # test_usecols_with_parse_dates4 - return str_objs + except (ValueError, TypeError): + # test_usecols_with_parse_dates4 + return str_objs if isinstance(result, DatetimeIndex): arr = result.to_numpy() @@ -1184,45 +1178,31 @@ def converter(*date_cols, col: Hashable): return result._values else: try: - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones " - "will raise an error", - category=FutureWarning, - ) - pre_parsed = date_parser( - *(unpack_if_single_element(arg) for arg in date_cols) + pre_parsed = date_parser( + *(unpack_if_single_element(arg) for arg in date_cols) + ) + try: + result = tools.to_datetime( + pre_parsed, + cache=cache_dates, ) - try: - result = tools.to_datetime( - pre_parsed, - cache=cache_dates, - ) - except (ValueError, TypeError): - # test_read_csv_with_custom_date_parser - result = pre_parsed + except (ValueError, TypeError): + # test_read_csv_with_custom_date_parser + result = pre_parsed if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: # e.g. test_datetime_fractional_seconds - with warnings.catch_warnings(): - warnings.filterwarnings( - "ignore", - ".*parsing datetimes with mixed time zones " - "will raise an error", - category=FutureWarning, - ) - pre_parsed = parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - ) - try: - return tools.to_datetime(pre_parsed) - except (ValueError, TypeError): - # TODO: not reached in tests 2023-10-27; needed? - return pre_parsed + pre_parsed = parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + ) + try: + return tools.to_datetime(pre_parsed) + except (ValueError, TypeError): + # TODO: not reached in tests 2023-10-27; needed? + return pre_parsed return converter diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 97e768b348d55..0756d490cd4fa 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -296,11 +296,9 @@ def test_construction_index_with_mixed_timezones(self): tm.assert_index_equal(result, exp, exact=True) assert not isinstance(result, DatetimeIndex) - msg = "DatetimeIndex has mixed timezones" - msg_depr = "parsing datetimes with mixed time zones will raise an error" - with pytest.raises(TypeError, match=msg): - with tm.assert_produces_warning(FutureWarning, match=msg_depr): - DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) # length = 1 result = Index([Timestamp("2011-01-01")], name="idx") diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index f02dd997e62d0..0bc0c3e744db7 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -2330,8 +2330,8 @@ def test_from_csv_with_mixed_offsets(all_parsers): result = parser.read_csv(StringIO(data), parse_dates=["a"])["a"] expected = Series( [ - Timestamp("2020-01-01 00:00:00+01:00"), - Timestamp("2020-01-01 00:00:00+00:00"), + "2020-01-01T00:00:00+01:00", + "2020-01-01T00:00:00+00:00", ], name="a", index=[0, 1], diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 0ed61fdd0ce45..3e617138c4a6a 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -12,7 +12,6 @@ import locale from dateutil.parser import parse -from dateutil.tz.tz import tzoffset import numpy as np import pytest import pytz @@ -460,15 +459,13 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): ], ], ) - def test_to_datetime_parse_tzname_or_tzoffset_utc_false_deprecated( + def test_to_datetime_parse_tzname_or_tzoffset_utc_false_removed( self, fmt, dates, expected_dates ): - # GH 13486, 50887 - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(dates, format=fmt) - expected = Index(expected_dates) - tm.assert_equal(result, expected) + # GH#13486, GH#50887, GH#57275 + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(dates, format=fmt) def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): # GH 32792 @@ -638,27 +635,21 @@ def test_to_datetime_mixed_datetime_and_string_with_format( "constructor", [Timestamp, lambda x: Timestamp(x).to_pydatetime()], ) - def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false( + def test_to_datetime_mixed_dt_and_str_with_format_mixed_offsets_utc_false_removed( self, fmt, constructor ): # https://github.com/pandas-dev/pandas/issues/49298 # https://github.com/pandas-dev/pandas/issues/50254 + # GH#57275 # note: ISO8601 formats go down a fastpath, so we need to check both # a ISO8601 format and a non-ISO8601 one args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"] ts1 = constructor(args[0]) ts2 = args[1] - msg = "parsing datetimes with mixed time zones will raise an error" + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - expected = Index( - [ - Timestamp("2000-01-01 01:00:00"), - Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), - ], - ) - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime([ts1, ts2], format=fmt, utc=False) - tm.assert_index_equal(result, expected) + with pytest.raises(ValueError, match=msg): + to_datetime([ts1, ts2], format=fmt, utc=False) @pytest.mark.parametrize( "fmt, expected", @@ -683,18 +674,19 @@ def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_fal ), ], ) - def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected): + def test_to_datetime_mixed_offsets_with_none_tz_utc_false_removed( + self, fmt, expected + ): # https://github.com/pandas-dev/pandas/issues/50071 - msg = "parsing datetimes with mixed time zones will raise an error" + # GH#57275 + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime( + with pytest.raises(ValueError, match=msg): + to_datetime( ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], format=fmt, utc=False, ) - expected = Index(expected) - tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "fmt, expected", @@ -1153,17 +1145,16 @@ def test_to_datetime_tz_mixed(self, cache): ) tm.assert_index_equal(result, expected) - def test_to_datetime_different_offsets(self, cache): + def test_to_datetime_different_offsets_removed(self, cache): # inspired by asv timeseries.ToDatetimeNONISO8601 benchmark # see GH-26097 for more + # GH#57275 ts_string_1 = "March 1, 2018 12:00:00+0400" ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 - expected = Index([parse(x) for x in arr]) - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(arr, cache=cache) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(arr, cache=cache) def test_to_datetime_tz_pytz(self, cache): # see gh-8260 @@ -1512,23 +1503,15 @@ def test_week_without_day_and_calendar_year(self, date, format): to_datetime(date, format=format) def test_to_datetime_coerce(self): - # GH 26122 + # GH#26122, GH#57275 ts_strings = [ "March 1, 2018 12:00:00+0400", "March 1, 2018 12:00:00+0500", "20100240", ] - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(ts_strings, errors="coerce") - expected = Index( - [ - datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), - datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 18000)), - NaT, - ] - ) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ts_strings, errors="coerce") @pytest.mark.parametrize( "string_arg, format", @@ -1595,23 +1578,12 @@ def test_iso_8601_strings_with_same_offset(self): result = DatetimeIndex([ts_str] * 2) tm.assert_index_equal(result, expected) - def test_iso_8601_strings_with_different_offsets(self): - # GH 17697, 11736, 50887 + def test_iso_8601_strings_with_different_offsets_removed(self): + # GH#17697, GH#11736, GH#50887, GH#57275 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = to_datetime(ts_strings) - expected = np.array( - [ - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), - datetime(2015, 11, 18, 16, 30, tzinfo=tzoffset(None, 23400)), - NaT, - ], - dtype=object, - ) - # GH 21864 - expected = Index(expected) - tm.assert_index_equal(result, expected) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ts_strings) def test_iso_8601_strings_with_different_offsets_utc(self): ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] @@ -1621,8 +1593,8 @@ def test_iso_8601_strings_with_different_offsets_utc(self): ) tm.assert_index_equal(result, expected) - def test_mixed_offsets_with_native_datetime_raises(self): - # GH 25978 + def test_mixed_offsets_with_native_datetime_utc_false_raises(self): + # GH#25978, GH#57275 vals = [ "nan", @@ -1636,29 +1608,9 @@ def test_mixed_offsets_with_native_datetime_raises(self): ser = Series(vals) assert all(ser[i] is vals[i] for i in range(len(vals))) # GH#40111 - now = Timestamp("now") - today = Timestamp("today") - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - mixed = to_datetime(ser) - expected = Series( - [ - "NaT", - Timestamp("1990-01-01"), - Timestamp("2015-03-14T16:15:14.123-08:00").to_pydatetime(), - Timestamp("2019-03-04T21:56:32.620-07:00").to_pydatetime(), - None, - ], - dtype=object, - ) - tm.assert_series_equal(mixed[:-2], expected) - # we'll check mixed[-1] and mixed[-2] match now and today to within - # call-timing tolerances - assert (now - mixed.iloc[-1]).total_seconds() <= 0.1 - assert (today - mixed.iloc[-2]).total_seconds() <= 0.1 - - with pytest.raises(ValueError, match="Tz-aware datetime.datetime"): - to_datetime(mixed) + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + to_datetime(ser) def test_non_iso_strings_with_tz_offset(self): result = to_datetime(["March 1, 2018 12:00:00+0400"] * 2) @@ -1719,10 +1671,10 @@ def test_to_datetime_fixed_offset(self): ], ], ) - def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date): - # GH 50887 - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): + def test_to_datetime_mixed_offsets_with_utc_false_removed(self, date): + # GH#50887, GH#57275 + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(date, utc=False) @@ -3510,10 +3462,10 @@ def test_to_datetime_with_empty_str_utc_false_format_mixed(): def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): - # GH 50887 - msg = "parsing datetimes with mixed time zones will raise an error" + # GH#50887, GH#57275 + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" - with tm.assert_produces_warning(FutureWarning, match=msg): + with pytest.raises(ValueError, match=msg): to_datetime( ["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed" ) @@ -3572,7 +3524,7 @@ def test_to_datetime_mixed_types_matching_tzs(): ) @pytest.mark.parametrize("naive_first", [True, False]) def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_first): - # GH#55793, GH#55693 + # GH#55793, GH#55693, GH#57275 # Empty string parses to NaT vals = [aware_val, naive_val, ""] @@ -3586,8 +3538,6 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir both_strs = isinstance(aware_val, str) and isinstance(naive_val, str) has_numeric = isinstance(naive_val, (int, float)) - depr_msg = "In a future version of pandas, parsing datetimes with mixed time zones" - first_non_null = next(x for x in vec if x != "") # if first_non_null is a not a string, _guess_datetime_format_for_array # doesn't guess a format so we don't go through array_strptime @@ -3628,19 +3578,19 @@ def test_to_datetime_mixed_awareness_mixed_types(aware_val, naive_val, naive_fir to_datetime(vec, utc=True) else: - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(vec) # No warning/error with utc=True to_datetime(vec, utc=True) if both_strs: - with tm.assert_produces_warning(FutureWarning, match=depr_msg): + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): to_datetime(vec, format="mixed") - with tm.assert_produces_warning(FutureWarning, match=depr_msg): - msg = "DatetimeIndex has mixed timezones" - with pytest.raises(TypeError, match=msg): - DatetimeIndex(vec) + with pytest.raises(ValueError, match=msg): + DatetimeIndex(vec) else: msg = "Cannot mix tz-aware with tz-naive values" if naive_first and isinstance(aware_val, Timestamp): diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index f8939d1d8ccd4..30ea3a70552aa 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -200,19 +200,9 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - msg = "parsing datetimes with mixed time zones will raise an error" - with tm.assert_produces_warning(FutureWarning, match=msg): - result, result_tz = tslib.array_to_datetime(data) - expected = np.array( - [ - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), - datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 23400)), - ], - dtype=object, - ) - - tm.assert_numpy_array_equal(result, expected) - assert result_tz is None + msg = "cannot parse datetimes with mixed time zones unless `utc=True`" + with pytest.raises(ValueError, match=msg): + tslib.array_to_datetime(data) @pytest.mark.parametrize(