Skip to content

Commit 6a8aca9

Browse files
authored
BUG: to_datetime with infer_datetime_format dropped timezone names (#33133)
1 parent f10ec59 commit 6a8aca9

File tree

4 files changed

+17
-4
lines changed

4 files changed

+17
-4
lines changed

doc/source/whatsnew/v1.1.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ Timedelta
313313
Timezones
314314
^^^^^^^^^
315315

316-
-
316+
- Bug in :func:`to_datetime` with ``infer_datetime_format=True`` where timezone names (e.g. ``UTC``) would not be parsed correctly (:issue:`33133`)
317317
-
318318

319319

pandas/_libs/tslibs/parsing.pyx

+1
Original file line numberDiff line numberDiff line change
@@ -805,6 +805,7 @@ def _guess_datetime_format(dt_str, dayfirst=False, dt_str_parse=du_parse,
805805
(('second',), '%S', 2),
806806
(('microsecond',), '%f', 6),
807807
(('second', 'microsecond'), '%S.%f', 0),
808+
(('tzinfo',), '%Z', 0),
808809
]
809810

810811
if dayfirst:

pandas/core/tools/datetimes.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -606,9 +606,9 @@ def to_datetime(
606606
would calculate the number of milliseconds to the unix epoch start.
607607
infer_datetime_format : bool, default False
608608
If True and no `format` is given, attempt to infer the format of the
609-
datetime strings, and if it can be inferred, switch to a faster
610-
method of parsing them. In some cases this can increase the parsing
611-
speed by ~5-10x.
609+
datetime strings based on the first non-NaN element,
610+
and if it can be inferred, switch to a faster method of parsing them.
611+
In some cases this can increase the parsing speed by ~5-10x.
612612
origin : scalar, default 'unix'
613613
Define the reference date. The numeric values would be parsed as number
614614
of units (defined by `unit`) since this reference date.

pandas/tests/tools/test_to_datetime.py

+12
Original file line numberDiff line numberDiff line change
@@ -1862,6 +1862,18 @@ def test_to_datetime_infer_datetime_format_series_start_with_nans(self, cache):
18621862
pd.to_datetime(s, infer_datetime_format=True, cache=cache),
18631863
)
18641864

1865+
@pytest.mark.parametrize(
1866+
"tz_name, offset", [("UTC", 0), ("UTC-3", 180), ("UTC+3", -180)]
1867+
)
1868+
def test_infer_datetime_format_tz_name(self, tz_name, offset):
1869+
# GH 33133
1870+
s = pd.Series([f"2019-02-02 08:07:13 {tz_name}"])
1871+
result = to_datetime(s, infer_datetime_format=True)
1872+
expected = pd.Series(
1873+
[pd.Timestamp("2019-02-02 08:07:13").tz_localize(pytz.FixedOffset(offset))]
1874+
)
1875+
tm.assert_series_equal(result, expected)
1876+
18651877
@pytest.mark.parametrize("cache", [True, False])
18661878
def test_to_datetime_iso8601_noleading_0s(self, cache):
18671879
# GH 11871

0 commit comments

Comments
 (0)