Skip to content

Commit 91d172f

Browse files
committed
ENH: Add support for more placeholders in guess_datetime_format (#43901)
Add support for day of week and meridiem placeholders and any combination of placeholders supported by `strftime` that do not correspond to a datetime attribute.
1 parent a7e9183 commit 91d172f

File tree

4 files changed

+27
-9
lines changed

4 files changed

+27
-9
lines changed

asv_bench/benchmarks/inference.py

+10
Original file line numberDiff line numberDiff line change
@@ -277,6 +277,16 @@ def time_dup_string_tzoffset_dates(self, cache):
277277
to_datetime(self.dup_string_with_tz, cache=cache)
278278

279279

280+
# GH 43901
281+
class ToDatetimeInferDatetimeFormat:
282+
def setup(self):
283+
rng = date_range(start="1/1/2000", periods=100000, freq="H")
284+
self.strings = rng.strftime("%Y-%m-%d %H:%M:%S").tolist()
285+
286+
def time_infer_datetime_format(self):
287+
to_datetime(self.strings, infer_datetime_format=True)
288+
289+
280290
class ToTimedelta:
281291
def setup(self):
282292
self.ints = np.random.randint(0, 60, size=10000)

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ Performance improvements
358358
- Performance improvement in some :meth:`GroupBy.apply` operations (:issue:`42992`)
359359
- Performance improvement in :func:`read_stata` (:issue:`43059`)
360360
- Performance improvement in :meth:`to_datetime` with ``uint`` dtypes (:issue:`42606`)
361+
- Performance improvement in :meth:`to_datetime` with ``infer_datetime_format`` set to ``True`` (:issue:`43901`)
361362
- Performance improvement in :meth:`Series.sparse.to_coo` (:issue:`42880`)
362363
- Performance improvement in indexing with a :class:`MultiIndex` indexer on another :class:`MultiIndex` (:issue:43370`)
363364
- Performance improvement in :meth:`GroupBy.quantile` (:issue:`43469`)

pandas/_libs/tslibs/parsing.pyx

+14-9
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,9 @@ def guess_datetime_format(
911911
(('second', 'microsecond'), '%S.%f', 0),
912912
(('tzinfo',), '%z', 0),
913913
(('tzinfo',), '%Z', 0),
914+
(('day_of_week',), '%a', 0),
915+
(('day_of_week',), '%A', 0),
916+
(('meridiem',), '%p', 0),
914917
]
915918

916919
if dayfirst:
@@ -967,15 +970,17 @@ def guess_datetime_format(
967970
if set(attrs) & found_attrs:
968971
continue
969972

970-
if all(getattr(parsed_datetime, attr) is not None for attr in attrs):
971-
for i, token_format in enumerate(format_guess):
972-
token_filled = tokens[i].zfill(padding)
973-
if (token_format is None and
974-
token_filled == parsed_datetime.strftime(attr_format)):
975-
format_guess[i] = attr_format
976-
tokens[i] = token_filled
977-
found_attrs.update(attrs)
978-
break
973+
if parsed_datetime.tzinfo is None and attr_format in ("%Z", "%z"):
974+
continue
975+
976+
parsed_formatted = parsed_datetime.strftime(attr_format)
977+
for i, token_format in enumerate(format_guess):
978+
token_filled = tokens[i].zfill(padding)
979+
if token_format is None and token_filled == parsed_formatted:
980+
format_guess[i] = attr_format
981+
tokens[i] = token_filled
982+
found_attrs.update(attrs)
983+
break
979984

980985
# Only consider it a valid guess if we have a year, month and day
981986
if len({'year', 'month', 'day'} & found_attrs) != 3:

pandas/tests/tslibs/test_parsing.py

+2
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,8 @@ def test_parsers_month_freq(date_str, expected):
167167
("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"),
168168
("2011-12-30T00:00:00.000000+09:", None),
169169
("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"),
170+
("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"),
171+
("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"),
170172
],
171173
)
172174
def test_guess_datetime_format_with_parseable_formats(string, fmt):

0 commit comments

Comments
 (0)