diff --git a/doc/source/user_guide/io.rst b/doc/source/user_guide/io.rst index 3dba120c0c64b..bb51124f10e54 100644 --- a/doc/source/user_guide/io.rst +++ b/doc/source/user_guide/io.rst @@ -931,6 +931,8 @@ Parsing a CSV with mixed timezones pandas cannot natively represent a column or index with mixed timezones. If your CSV file contains columns with a mixture of timezones, the default result will be an object-dtype column with strings, even with ``parse_dates``. +To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and +then call :func:`to_datetime` with ``utc=True``. .. ipython:: python @@ -939,14 +941,6 @@ an object-dtype column with strings, even with ``parse_dates``. a 2000-01-01T00:00:00+05:00 2000-01-01T00:00:00+06:00""" - df = pd.read_csv(StringIO(content), parse_dates=["a"]) - df["a"] - -To parse the mixed-timezone values as a datetime column, read in as ``object`` dtype and -then call :func:`to_datetime` with ``utc=True``. - -.. ipython:: python - df = pd.read_csv(StringIO(content)) df["a"] = pd.to_datetime(df["a"], utc=True) df["a"] diff --git a/doc/source/whatsnew/v0.24.0.rst b/doc/source/whatsnew/v0.24.0.rst index 7f5025e6ce60b..73a523b14f9f7 100644 --- a/doc/source/whatsnew/v0.24.0.rst +++ b/doc/source/whatsnew/v0.24.0.rst @@ -632,13 +632,19 @@ Parsing datetime strings with the same UTC offset will preserve the UTC offset i Parsing datetime strings with different UTC offsets will now create an Index of ``datetime.datetime`` objects with different UTC offsets -.. ipython:: python +.. code-block:: ipython + + In [59]: idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", + "2015-11-18 16:30:00+06:30"]) + + In[60]: idx + Out[60]: Index([2015-11-18 15:30:00+05:30, 2015-11-18 16:30:00+06:30], dtype='object') + + In[61]: idx[0] + Out[61]: Timestamp('2015-11-18 15:30:00+0530', tz='UTC+05:30') - idx = pd.to_datetime(["2015-11-18 15:30:00+05:30", - "2015-11-18 16:30:00+06:30"]) - idx - idx[0] - idx[1] + In[62]: idx[1] + Out[62]: Timestamp('2015-11-18 16:30:00+0630', tz='UTC+06:30') Passing ``utc=True`` will mimic the previous behavior but will correctly indicate that the dates have been converted to UTC @@ -673,15 +679,22 @@ Parsing mixed-timezones with :func:`read_csv` *New behavior* -.. ipython:: python +.. code-block:: ipython + + In[64]: import io + + In[65]: content = """\ + ...: a + ...: 2000-01-01T00:00:00+05:00 + ...: 2000-01-01T00:00:00+06:00""" + + In[66]: df = pd.read_csv(io.StringIO(content), parse_dates=['a']) - import io - content = """\ - a - 2000-01-01T00:00:00+05:00 - 2000-01-01T00:00:00+06:00""" - df = pd.read_csv(io.StringIO(content), parse_dates=['a']) - df.a + In[67]: df.a + Out[67]: + 0 2000-01-01 00:00:00+05:00 + 1 2000-01-01 00:00:00+06:00 + Name: a, Length: 2, dtype: object As can be seen, the ``dtype`` is object; each value in the column is a string. To convert the strings to an array of datetimes, the ``date_parser`` argument diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index f31ab02725394..dd566eaab1e75 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -208,7 +208,14 @@ For example: tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100", "2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"] pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True) - pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + +.. code-block:: ipython + + In[37]: pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + Out[37]: + Index([2010-01-01 12:00:00+01:00, 2010-01-01 12:00:00-01:00, + 2010-01-01 12:00:00+03:00, 2010-01-01 12:00:00+04:00], + dtype='object') .. _whatsnew_110.grouper_resample_origin: diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index d0dae450735a3..91efcfd590c01 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -295,8 +295,53 @@ Other API changes .. --------------------------------------------------------------------------- .. _whatsnew_210.deprecations: -Deprecations -~~~~~~~~~~~~ +Deprecate parsing datetimes with mixed time zones +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Parsing datetimes with mixed time zones is deprecated and shows a warning unless user passes ``utc=True`` to :func:`to_datetime` (:issue:`50887`) + +*Previous behavior*: + +.. code-block:: ipython + + In [7]: data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + + In [8]: pd.to_datetime(data, utc=False) + Out[8]: + Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') + +*New behavior*: + +.. code-block:: ipython + + In [9]: pd.to_datetime(data, utc=False) + FutureWarning: + In a future version of pandas, parsing datetimes with mixed time zones will raise + a warning unless `utc=True`. Please specify `utc=True` to opt in to the new behaviour + and silence this warning. To create a `Series` with mixed offsets and `object` dtype, + please use `apply` and `datetime.datetime.strptime`. + Index([2020-01-01 00:00:00+06:00, 2020-01-01 00:00:00+01:00], dtype='object') + +In order to silence this warning and avoid an error in a future version of pandas, +please specify ``utc=True``: + +.. ipython:: python + + data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + pd.to_datetime(data, utc=True) + +To create a ``Series`` with mixed offsets and ``object`` dtype, please use ``apply`` +and ``datetime.datetime.strptime``: + +.. ipython:: python + + import datetime as dt + + data = ["2020-01-01 00:00:00+06:00", "2020-01-01 00:00:00+01:00"] + pd.Series(data).apply(lambda x: dt.datetime.strptime(x, '%Y-%m-%d %H:%M:%S%z')) + +Other Deprecations +~~~~~~~~~~~~~~~~~~ - Deprecated 'broadcast_axis' keyword in :meth:`Series.align` and :meth:`DataFrame.align`, upcast before calling ``align`` with ``left = DataFrame({col: left for col in right.columns}, index=right.index)`` (:issue:`51856`) - Deprecated 'downcast' keyword in :meth:`Index.fillna` (:issue:`53956`) - Deprecated 'fill_method' and 'limit' keywords in :meth:`DataFrame.pct_change`, :meth:`Series.pct_change`, :meth:`DataFrameGroupBy.pct_change`, and :meth:`SeriesGroupBy.pct_change`, explicitly call ``ffill`` or ``bfill`` before calling ``pct_change`` instead (:issue:`53491`) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 106f203a16855..20a18cf56779f 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -620,6 +620,7 @@ cdef _array_to_datetime_object( # 1) NaT or NaT-like values # 2) datetime strings, which we return as datetime.datetime # 3) special strings - "now" & "today" + unique_timezones = set() for i in range(n): # Analogous to: val = values[i] val = (cnp.PyArray_MultiIter_DATA(mi, 1))[0] @@ -649,6 +650,7 @@ cdef _array_to_datetime_object( tzinfo=tsobj.tzinfo, fold=tsobj.fold, ) + unique_timezones.add(tsobj.tzinfo) except (ValueError, OverflowError) as ex: ex.args = (f"{ex}, at position {i}", ) @@ -666,6 +668,16 @@ cdef _array_to_datetime_object( cnp.PyArray_MultiIter_NEXT(mi) + if len(unique_timezones) > 1: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. " + "Please specify `utc=True` to opt in to the new behaviour " + "and silence this warning. To create a `Series` with mixed offsets and " + "`object` dtype, please use `apply` and `datetime.datetime.strptime`", + FutureWarning, + stacklevel=find_stack_level(), + ) return oresult_nd, None diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 801968bd59f4e..95faea468fb5d 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -340,6 +340,7 @@ def _return_parsed_timezone_results( tz_result : Index-like of parsed dates with timezone """ tz_results = np.empty(len(result), dtype=object) + non_na_timezones = set() for zone in unique(timezones): mask = timezones == zone dta = DatetimeArray(result[mask]).tz_localize(zone) @@ -348,8 +349,20 @@ def _return_parsed_timezone_results( dta = dta.tz_localize("utc") else: dta = dta.tz_convert("utc") + else: + if not dta.isna().all(): + non_na_timezones.add(zone) tz_results[mask] = dta - + if len(non_na_timezones) > 1: + warnings.warn( + "In a future version of pandas, parsing datetimes with mixed time " + "zones will raise a warning unless `utc=True`. Please specify `utc=True` " + "to opt in to the new behaviour and silence this warning. " + "To create a `Series` with mixed offsets and `object` dtype, " + "please use `apply` and `datetime.datetime.strptime`", + FutureWarning, + stacklevel=find_stack_level(), + ) return Index(tz_results, name=name) @@ -772,6 +785,14 @@ def to_datetime( offsets (typically, daylight savings), see :ref:`Examples ` section for details. + .. warning:: + + In a future version of pandas, parsing datetimes with mixed time + zones will raise a warning unless `utc=True`. + Please specify `utc=True` to opt in to the new behaviour + and silence this warning. To create a `Series` with mixed offsets and + `object` dtype, please use `apply` and `datetime.datetime.strptime`. + See also: pandas general documentation about `timezone conversion and localization >> pd.to_datetime(['2020-10-25 02:00 +0200', '2020-10-25 04:00 +0100']) + are **not successfully converted** to a :class:`DatetimeIndex`. + Parsing datetimes with mixed time zones will show a warning unless + `utc=True`. If you specify `utc=False` the warning below will be shown + and a simple :class:`Index` containing :class:`datetime.datetime` + objects will be returned: + + >>> pd.to_datetime(['2020-10-25 02:00 +0200', + ... '2020-10-25 04:00 +0100']) # doctest: +SKIP + FutureWarning: In a future version of pandas, parsing datetimes with mixed + time zones will raise a warning unless `utc=True`. Please specify `utc=True` + to opt in to the new behaviour and silence this warning. To create a `Series` + with mixed offsets and `object` dtype, please use `apply` and + `datetime.datetime.strptime`. Index([2020-10-25 02:00:00+02:00, 2020-10-25 04:00:00+01:00], dtype='object') @@ -1005,7 +1034,13 @@ def to_datetime( a simple :class:`Index` containing :class:`datetime.datetime` objects: >>> from datetime import datetime - >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", datetime(2020, 1, 1, 3, 0)]) + >>> pd.to_datetime(["2020-01-01 01:00:00-01:00", + ... datetime(2020, 1, 1, 3, 0)]) # doctest: +SKIP + FutureWarning: In a future version of pandas, parsing datetimes with mixed + time zones will raise a warning unless `utc=True`. Please specify `utc=True` + to opt in to the new behaviour and silence this warning. To create a `Series` + with mixed offsets and `object` dtype, please use `apply` and + `datetime.datetime.strptime`. Index([2020-01-01 01:00:00-01:00, 2020-01-01 03:00:00], dtype='object') | diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index fb45622dac3af..833f4986b6da6 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1312,7 +1312,14 @@ def _try_convert_to_date(self, data): date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - new_data = to_datetime(new_data, errors="raise", unit=date_unit) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time " + "zones will raise a warning", + category=FutureWarning, + ) + new_data = to_datetime(new_data, errors="raise", unit=date_unit) except (ValueError, OverflowError, TypeError): continue return new_data, True diff --git a/pandas/io/parsers/base_parser.py b/pandas/io/parsers/base_parser.py index 0a90deedf7ad2..60996a7d42187 100644 --- a/pandas/io/parsers/base_parser.py +++ b/pandas/io/parsers/base_parser.py @@ -1144,14 +1144,20 @@ def converter(*date_cols, col: Hashable): date_format.get(col) if isinstance(date_format, dict) else date_format ) - result = tools.to_datetime( - ensure_object(strs), - format=date_fmt, - utc=False, - dayfirst=dayfirst, - errors="ignore", - cache=cache_dates, - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones will raise a warning", + category=FutureWarning, + ) + result = tools.to_datetime( + ensure_object(strs), + format=date_fmt, + utc=False, + dayfirst=dayfirst, + errors="ignore", + cache=cache_dates, + ) if isinstance(result, DatetimeIndex): arr = result.to_numpy() arr.flags.writeable = True @@ -1159,22 +1165,38 @@ def converter(*date_cols, col: Hashable): return result._values else: try: - result = tools.to_datetime( - date_parser(*(unpack_if_single_element(arg) for arg in date_cols)), - errors="ignore", - cache=cache_dates, - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones " + "will raise a warning", + category=FutureWarning, + ) + result = tools.to_datetime( + date_parser( + *(unpack_if_single_element(arg) for arg in date_cols) + ), + errors="ignore", + cache=cache_dates, + ) if isinstance(result, datetime.datetime): raise Exception("scalar parser") return result except Exception: - return tools.to_datetime( - parsing.try_parse_dates( - parsing.concat_date_cols(date_cols), - parser=date_parser, - ), - errors="ignore", - ) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + ".*parsing datetimes with mixed time zones " + "will raise a warning", + category=FutureWarning, + ) + return tools.to_datetime( + parsing.try_parse_dates( + parsing.concat_date_cols(date_cols), + parser=date_parser, + ), + errors="ignore", + ) return converter diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index 6d18a292061b9..733c14f33567a 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -300,8 +300,10 @@ def test_construction_index_with_mixed_timezones(self): assert not isinstance(result, DatetimeIndex) msg = "DatetimeIndex has mixed timezones" + msg_depr = "parsing datetimes with mixed time zones will raise a warning" with pytest.raises(TypeError, match=msg): - DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) + with tm.assert_produces_warning(FutureWarning, match=msg_depr): + DatetimeIndex(["2013-11-02 22:00-05:00", "2013-11-03 22:00-06:00"]) # length = 1 result = Index([Timestamp("2011-01-01")], name="idx") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 5ea0ca1fddbd3..e5dfae169453f 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -446,19 +446,6 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): ["2010-01-01 12:00:00 UTC"] * 2, [Timestamp("2010-01-01 12:00:00", tz="UTC")] * 2, ], - [ - "%Y-%m-%d %H:%M:%S %Z", - [ - "2010-01-01 12:00:00 UTC", - "2010-01-01 12:00:00 GMT", - "2010-01-01 12:00:00 US/Pacific", - ], - [ - Timestamp("2010-01-01 12:00:00", tz="UTC"), - Timestamp("2010-01-01 12:00:00", tz="GMT"), - Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), - ], - ], [ "%Y-%m-%d %H:%M:%S%z", ["2010-01-01 12:00:00+0100"] * 2, @@ -479,18 +466,6 @@ def test_to_datetime_format_weeks(self, value, fmt, expected, cache): ] * 2, ], - [ - "%Y-%m-%d %H:%M:%S %z", - ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], - [ - Timestamp( - "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) - ), - Timestamp( - "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60)) - ), - ], - ], [ "%Y-%m-%d %H:%M:%S %z", ["2010-01-01 12:00:00 Z", "2010-01-01 12:00:00 Z"], @@ -509,6 +484,46 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): expected = Index(expected_dates) tm.assert_equal(result, expected) + @pytest.mark.parametrize( + "fmt,dates,expected_dates", + [ + [ + "%Y-%m-%d %H:%M:%S %Z", + [ + "2010-01-01 12:00:00 UTC", + "2010-01-01 12:00:00 GMT", + "2010-01-01 12:00:00 US/Pacific", + ], + [ + Timestamp("2010-01-01 12:00:00", tz="UTC"), + Timestamp("2010-01-01 12:00:00", tz="GMT"), + Timestamp("2010-01-01 12:00:00", tz="US/Pacific"), + ], + ], + [ + "%Y-%m-%d %H:%M:%S %z", + ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100"], + [ + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=60)) + ), + Timestamp( + "2010-01-01 12:00:00", tzinfo=timezone(timedelta(minutes=-60)) + ), + ], + ], + ], + ) + def test_to_datetime_parse_tzname_or_tzoffset_utc_false_deprecated( + self, fmt, dates, expected_dates + ): + # GH 13486, 50887 + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(dates, format=fmt) + expected = Index(expected_dates) + tm.assert_equal(result, expected) + def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): # GH 32792 dates = [ @@ -632,17 +647,6 @@ def test_to_datetime_mixed_date_and_string(self, format): ), id="all tz-aware, mixed offsets, with utc", ), - pytest.param( - False, - ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], - Index( - [ - Timestamp("2000-01-01 01:00:00"), - Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), - ], - ), - id="tz-aware string, naive pydatetime, without utc", - ), pytest.param( True, ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"], @@ -671,20 +675,41 @@ def test_to_datetime_mixed_datetime_and_string_with_format( tm.assert_index_equal(result, expected) @pytest.mark.parametrize( - "fmt, utc, expected", + "fmt", + ["%Y-%d-%m %H:%M:%S%z", "%Y-%m-%d %H:%M:%S%z"], + ids=["non-ISO8601 format", "ISO8601 format"], + ) + @pytest.mark.parametrize( + "constructor", + [Timestamp, lambda x: Timestamp(x).to_pydatetime()], + ) + def test_to_datetime_mixed_datetime_and_string_with_format_mixed_offsets_utc_false( + self, fmt, constructor + ): + # https://github.com/pandas-dev/pandas/issues/49298 + # https://github.com/pandas-dev/pandas/issues/50254 + # note: ISO8601 formats go down a fastpath, so we need to check both + # a ISO8601 format and a non-ISO8601 one + args = ["2000-01-01 01:00:00", "2000-01-01 02:00:00+00:00"] + ts1 = constructor(args[0]) + ts2 = args[1] + msg = "parsing datetimes with mixed time zones will raise a warning" + + expected = Index( + [ + Timestamp("2000-01-01 01:00:00"), + Timestamp("2000-01-01 02:00:00+0000", tz="UTC"), + ], + ) + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime([ts1, ts2], format=fmt, utc=False) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "fmt, expected", [ pytest.param( "%Y-%m-%d %H:%M:%S%z", - True, - DatetimeIndex( - ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", - ), - id="ISO8601, UTC", - ), - pytest.param( - "%Y-%m-%d %H:%M:%S%z", - False, Index( [ Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"), @@ -696,16 +721,6 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ), pytest.param( "%Y-%d-%m %H:%M:%S%z", - True, - DatetimeIndex( - ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], - dtype="datetime64[ns, UTC]", - ), - id="non-ISO8601, UTC", - ), - pytest.param( - "%Y-%d-%m %H:%M:%S%z", - False, Index( [ Timestamp("2000-01-01 09:00:00+0100", tz="UTC+01:00"), @@ -717,12 +732,45 @@ def test_to_datetime_mixed_datetime_and_string_with_format( ), ], ) - def test_to_datetime_mixed_offsets_with_none(self, fmt, utc, expected): + def test_to_datetime_mixed_offsets_with_none_tz(self, fmt, expected): + # https://github.com/pandas-dev/pandas/issues/50071 + msg = "parsing datetimes with mixed time zones will raise a warning" + + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime( + ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], + format=fmt, + utc=False, + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "fmt, expected", + [ + pytest.param( + "%Y-%m-%d %H:%M:%S%z", + DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-01-02 00:00:00+00:00", "NaT"], + dtype="datetime64[ns, UTC]", + ), + id="ISO8601, UTC", + ), + pytest.param( + "%Y-%d-%m %H:%M:%S%z", + DatetimeIndex( + ["2000-01-01 08:00:00+00:00", "2000-02-01 00:00:00+00:00", "NaT"], + dtype="datetime64[ns, UTC]", + ), + id="non-ISO8601, UTC", + ), + ], + ) + def test_to_datetime_mixed_offsets_with_none(self, fmt, expected): # https://github.com/pandas-dev/pandas/issues/50071 result = to_datetime( ["2000-01-01 09:00:00+01:00", "2000-01-02 02:00:00+02:00", None], format=fmt, - utc=utc, + utc=True, ) tm.assert_index_equal(result, expected) @@ -1188,7 +1236,9 @@ def test_to_datetime_different_offsets(self, cache): ts_string_2 = "March 1, 2018 12:00:00+0500" arr = [ts_string_1] * 5 + [ts_string_2] * 5 expected = Index([parse(x) for x in arr]) - result = to_datetime(arr, cache=cache) + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(arr, cache=cache) tm.assert_index_equal(result, expected) def test_to_datetime_tz_pytz(self, cache): @@ -1554,7 +1604,9 @@ def test_to_datetime_coerce(self): "March 1, 2018 12:00:00+0500", "20100240", ] - result = to_datetime(ts_strings, errors="coerce") + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(ts_strings, errors="coerce") expected = Index( [ datetime(2018, 3, 1, 12, 0, tzinfo=tzoffset(None, 14400)), @@ -1635,9 +1687,11 @@ def test_iso_8601_strings_with_same_offset(self): tm.assert_index_equal(result, expected) def test_iso_8601_strings_with_different_offsets(self): - # GH 17697, 11736 + # GH 17697, 11736, 50887 ts_strings = ["2015-11-18 15:30:00+05:30", "2015-11-18 16:30:00+06:30", NaT] - result = to_datetime(ts_strings) + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = to_datetime(ts_strings) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)), @@ -1675,7 +1729,9 @@ def test_mixed_offsets_with_native_datetime_raises(self): now = Timestamp("now") today = Timestamp("today") - mixed = to_datetime(ser) + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + mixed = to_datetime(ser) expected = Series( [ "NaT", @@ -1741,6 +1797,23 @@ def test_to_datetime_fixed_offset(self): result = to_datetime(dates) assert result.tz == fixed_off + @pytest.mark.parametrize( + "date", + [ + ["2020-10-26 00:00:00+06:00", "2020-10-26 00:00:00+01:00"], + ["2020-10-26 00:00:00+06:00", Timestamp("2018-01-01", tz="US/Pacific")], + [ + "2020-10-26 00:00:00+06:00", + datetime(2020, 1, 1, 18, tzinfo=pytz.timezone("Australia/Melbourne")), + ], + ], + ) + def test_to_datetime_mixed_offsets_with_utc_false_deprecated(self, date): + # GH 50887 + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + to_datetime(date, utc=False) + class TestToDatetimeUnit: @pytest.mark.parametrize("unit", ["Y", "M"]) @@ -3613,3 +3686,20 @@ def test_from_numeric_arrow_dtype(any_numeric_ea_dtype): result = to_datetime(ser) expected = Series([1, 2], dtype="datetime64[ns]") tm.assert_series_equal(result, expected) + + +def test_to_datetime_with_empty_str_utc_false_format_mixed(): + # GH 50887 + result = to_datetime(["2020-01-01 00:00+00:00", ""], format="mixed") + expected = Index([Timestamp("2020-01-01 00:00+00:00"), "NaT"], dtype=object) + tm.assert_index_equal(result, expected) + + +def test_to_datetime_with_empty_str_utc_false_offsets_and_format_mixed(): + # GH 50887 + msg = "parsing datetimes with mixed time zones will raise a warning" + + with tm.assert_produces_warning(FutureWarning, match=msg): + to_datetime( + ["2020-01-01 00:00+00:00", "2020-01-01 00:00+02:00", ""], format="mixed" + ) diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index ba188c3182f57..435fe5f4b90d8 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -85,7 +85,9 @@ def test_parsing_different_timezone_offsets(): data = ["2015-11-18 15:30:00+05:30", "2015-11-18 15:30:00+06:30"] data = np.array(data, dtype=object) - result, result_tz = tslib.array_to_datetime(data) + msg = "parsing datetimes with mixed time zones will raise a warning" + with tm.assert_produces_warning(FutureWarning, match=msg): + result, result_tz = tslib.array_to_datetime(data) expected = np.array( [ datetime(2015, 11, 18, 15, 30, tzinfo=tzoffset(None, 19800)),