Skip to content

Commit 5b39709

Browse files
authored
PERF: Enable %z in parsing datetime (#32984)
1 parent 99f2ccb commit 5b39709

File tree

4 files changed

+57
-7
lines changed

4 files changed

+57
-7
lines changed

asv_bench/benchmarks/timeseries.py

+19-1
Original file line numberDiff line numberDiff line change
@@ -336,15 +336,33 @@ def time_infer_quarter(self):
336336

337337
class ToDatetimeFormat:
338338
def setup(self):
339-
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * 100000)
339+
N = 100000
340+
self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N)
340341
self.s2 = self.s.str.replace(":\\S+$", "")
341342

343+
self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N
344+
self.diff_offset = [
345+
f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10)
346+
] * int(N / 10)
347+
342348
def time_exact(self):
343349
to_datetime(self.s2, format="%d%b%y")
344350

345351
def time_no_exact(self):
346352
to_datetime(self.s, format="%d%b%y", exact=False)
347353

354+
def time_same_offset(self):
355+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
356+
357+
def time_different_offset(self):
358+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z")
359+
360+
def time_same_offset_to_utc(self):
361+
to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
362+
363+
def time_different_offset_to_utc(self):
364+
to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True)
365+
348366

349367
class ToDatetimeCache:
350368

doc/source/whatsnew/v1.1.0.rst

+16
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,22 @@ For example:
5858
5959
For more on working with fold, see :ref:`Fold subsection <timeseries.fold>` in the user guide.
6060

61+
.. _whatsnew_110.to_datetime_multiple_tzname_tzoffset_support:
62+
63+
Parsing timezone-aware format with different timezones in to_datetime
64+
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
65+
66+
:func:`to_datetime` now supports parsing formats containing timezone names (``%Z``) and UTC offsets (``%z``) from different timezones then converting them to UTC by setting ``utc=True``. This would return a :class:`DatetimeIndex` with timezone at UTC as opposed to an :class:`Index` with ``object`` dtype if ``utc=True`` is not set (:issue:`32792`).
67+
68+
For example:
69+
70+
.. ipython:: python
71+
72+
tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100",
73+
"2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"]
74+
pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True)
75+
pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z')
76+
6177
.. _whatsnew_110.enhancements.other:
6278

6379
Other enhancements

pandas/core/tools/datetimes.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -229,13 +229,12 @@ def _return_parsed_timezone_results(result, timezones, tz, name):
229229
-------
230230
tz_result : Index-like of parsed dates with timezone
231231
"""
232-
if tz is not None:
233-
raise ValueError(
234-
"Cannot pass a tz argument when parsing strings with timezone information."
235-
)
236232
tz_results = np.array(
237233
[Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)]
238234
)
235+
if tz is not None:
236+
# Convert to the same tz
237+
tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results])
239238
from pandas import Index
240239

241240
return Index(tz_results, name=name)

pandas/tests/tools/test_to_datetime.py

+19-2
Original file line numberDiff line numberDiff line change
@@ -323,8 +323,25 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates):
323323
expected = pd.Index(expected_dates)
324324
tm.assert_equal(result, expected)
325325

326-
with pytest.raises(ValueError):
327-
pd.to_datetime(dates, format=fmt, utc=True)
326+
def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self):
327+
# GH 32792
328+
dates = [
329+
"2010-01-01 12:00:00 +0100",
330+
"2010-01-01 12:00:00 -0100",
331+
"2010-01-01 12:00:00 +0300",
332+
"2010-01-01 12:00:00 +0400",
333+
]
334+
expected_dates = [
335+
"2010-01-01 11:00:00+00:00",
336+
"2010-01-01 13:00:00+00:00",
337+
"2010-01-01 09:00:00+00:00",
338+
"2010-01-01 08:00:00+00:00",
339+
]
340+
fmt = "%Y-%m-%d %H:%M:%S %z"
341+
342+
result = pd.to_datetime(dates, format=fmt, utc=True)
343+
expected = pd.DatetimeIndex(expected_dates)
344+
tm.assert_index_equal(result, expected)
328345

329346
@pytest.mark.parametrize(
330347
"offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]

0 commit comments

Comments
 (0)