diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 8db837a38170b..be48552fb04e9 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -313,7 +313,7 @@ Timedelta Timezones ^^^^^^^^^ -- +- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` where timezone names (e.g. ``UTC``) would not be parsed correctly (:issue:`33133`) - diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 74b95a2f3076f..5272a0a042d0e 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -805,6 +805,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse, (('second',), '%S', 2), (('microsecond',), '%f', 6), (('second', 'microsecond'), '%S.%f', 0), + (('tzinfo',), '%Z', 0), ] if dayfirst: diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index a2042de7f337e..207c5cc98449a 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -606,9 +606,9 @@ def to_datetime( would calculate the number of milliseconds to the unix epoch start. infer_datetime_format : bool, default False If True and no `format` is given, attempt to infer the format of the - datetime strings, and if it can be inferred, switch to a faster - method of parsing them. In some cases this can increase the parsing - speed by ~5-10x. + datetime strings based on the first non-NaN element, + and if it can be inferred, switch to a faster method of parsing them. + In some cases this can increase the parsing speed by ~5-10x. origin : scalar, default 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index a751182dbf7af..d2049892705ea 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1862,6 +1862,18 @@ def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache): pd.to_datetime(s, infer_datetime_format=True, cache=cache), ) + @pytest.mark.parametrize( + "tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)] + ) + def test_infer_datetime_format_tz_name(self, tz_name, offset): + # GH 33133 + s = pd.Series([f"2019-02-02 08:07:13 {tz_name}"]) + result = to_datetime(s, infer_datetime_format=True) + expected = pd.Series( + [pd.Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))] + ) + tm.assert_series_equal(result, expected) + @pytest.mark.parametrize("cache", [True, False]) def test_to_datetime_iso8601_noleading_0s(self, cache): # GH 11871