From 15a6a29e6334af06fd8f0e4c9c632154e15d1224 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 18 Feb 2023 09:09:16 -0800 Subject: [PATCH 1/2] DEPR: silently ignoring unrecognized timezones --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslibs/parsing.pyx | 12 ++++++++++++ pandas/tests/tools/test_to_datetime.py | 17 +++++++++++++++++ 3 files changed, 30 insertions(+) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 78422ec686da8..13d331bbd4d67 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -824,6 +824,7 @@ Other API changes Deprecations ~~~~~~~~~~~~ - Deprecated parsing datetime strings with system-local timezone to ``tzlocal``, pass a ``tz`` keyword or explicitly call ``tz_localize`` instead (:issue:`50791`) +- Deprecated silently dropping unrecognized timezones when parsing strings to datetimes (:issue:`18702`) - Deprecated argument ``infer_datetime_format`` in :func:`to_datetime` and :func:`read_csv`, as a strict version of it is now the default (:issue:`48621`) - Deprecated behavior of :func:`to_datetime` with ``unit`` when parsing strings, in a future version these will be parsed as datetimes (matching unit-less behavior) instead of cast to floats. To retain the old behavior, cast strings to numeric types before calling :func:`to_datetime` (:issue:`50735`) - Deprecated :func:`pandas.io.sql.execute` (:issue:`50185`) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 445683968c58f..11be2ae1aba46 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -722,6 +722,18 @@ cdef datetime dateutil_parse( f'Parsed string "{timestr}" gives an invalid tzoffset, ' "which must be between -timedelta(hours=24) and timedelta(hours=24)" ) + elif res.tzname is not None: + # e.g. "1994 Jan 15 05:16 FOO" where FOO is not recognized + # GH#18702 + warnings.warn( + f'Parsed string "{timestr}" included an un-recognized timezone ' + f'"{res.tzname}". Dropping unrecognized timezones is deprecated; ' + "in a future version this will raise. Instead pass the string " + "without the timezone, then use .tz_localize to convert to a " + "recognized timezone.", + FutureWarning, + stacklevel=find_stack_level() + ) out_bestunit[0] = attrname_to_npy_unit[reso] return ret diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index dc8b7ce0996a9..b693463cfdc34 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3595,3 +3595,20 @@ def test_to_datetime_mixed_not_necessarily_iso8601_coerce(errors, expected): # https://github.com/pandas-dev/pandas/issues/50411 result = to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601", errors=errors) tm.assert_index_equal(result, expected) + + +def test_ignoring_unknown_tz_deprecated(): + # GH#18702 + dtstr = "2014 Jan 9 05:15 FAKE" + msg = 'un-recognized timezone "FAKE". Dropping unrecognized timezones is deprecated' + with tm.assert_produces_warning(FutureWarning, match=msg): + res = Timestamp(dtstr) + assert res == Timestamp(dtstr[:-5]) + # FIXME: GH#51476 needs to be addressed before this warning will show + # up for to_datetime + # with tm.assert_produces_warning(FutureWarning): + # res = to_datetime(dtstr) + # assert res == to_datetime(dtstr[:-5]) + # with tm.assert_produces_warning(FutureWarning): + # res = to_datetime([dtstr]) + # tm.assert_index_equal(res, to_datetime([dtstr[:-5]])) From 3c0f18b8d4fff906581bd518226780ad86e16fd3 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 27 Feb 2023 16:34:24 -0800 Subject: [PATCH 2/2] catch to_datetime cases --- pandas/_libs/tslibs/parsing.pyx | 13 ++++++++++++- pandas/tests/tools/test_to_datetime.py | 17 ++++++++--------- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 11be2ae1aba46..c314149e24a4c 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -877,6 +877,8 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime format string (for `strftime` or `strptime`), or None if it can't be guessed. """ + cdef: + NPY_DATETIMEUNIT out_bestunit day_attribute_and_format = (("day",), "%d", 2) # attr name, format, padding (if any) @@ -907,8 +909,17 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: datetime_attrs_to_format.remove(day_attribute_and_format) datetime_attrs_to_format.insert(0, day_attribute_and_format) + # same default used by dateutil + default = datetime.now().replace(hour=0, minute=0, second=0, microsecond=0) try: - parsed_datetime = du_parse(dt_str, dayfirst=dayfirst) + parsed_datetime = dateutil_parse( + dt_str, + default=default, + dayfirst=dayfirst, + yearfirst=False, + ignoretz=False, + out_bestunit=&out_bestunit, + ) except (ValueError, OverflowError, InvalidOperation): # In case the datetime can't be parsed, its format cannot be guessed return None diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b693463cfdc34..c2be6294c2349 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -3598,17 +3598,16 @@ def test_to_datetime_mixed_not_necessarily_iso8601_coerce(errors, expected): def test_ignoring_unknown_tz_deprecated(): - # GH#18702 + # GH#18702, GH#51476 dtstr = "2014 Jan 9 05:15 FAKE" msg = 'un-recognized timezone "FAKE". Dropping unrecognized timezones is deprecated' with tm.assert_produces_warning(FutureWarning, match=msg): res = Timestamp(dtstr) assert res == Timestamp(dtstr[:-5]) - # FIXME: GH#51476 needs to be addressed before this warning will show - # up for to_datetime - # with tm.assert_produces_warning(FutureWarning): - # res = to_datetime(dtstr) - # assert res == to_datetime(dtstr[:-5]) - # with tm.assert_produces_warning(FutureWarning): - # res = to_datetime([dtstr]) - # tm.assert_index_equal(res, to_datetime([dtstr[:-5]])) + + with tm.assert_produces_warning(FutureWarning): + res = to_datetime(dtstr) + assert res == to_datetime(dtstr[:-5]) + with tm.assert_produces_warning(FutureWarning): + res = to_datetime([dtstr]) + tm.assert_index_equal(res, to_datetime([dtstr[:-5]]))