From 3aa9d2378a8a7127d6c18d4592c9f4d2a48e5dde Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sat, 17 Dec 2022 14:24:50 +0000 Subject: [PATCH 1/2] fix parsing --- pandas/_libs/tslibs/parsing.pyx | 4 +++- pandas/tests/tslibs/test_parsing.py | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 614db69425f4c..76eebb2e35d03 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1016,9 +1016,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: cdef str _fill_token(token: str, padding: int): cdef str token_filled - if "." not in token: + if re.search(r"\d*\.\d+", token) is None: + # For example: 98 token_filled = token.zfill(padding) else: + # For example: 00.123 seconds, nanoseconds = token.split(".") seconds = f"{int(seconds):02d}" # right-pad so we get nanoseconds, then only take diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index a4c79e77d2eed..4eb2fe3850fe8 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -179,6 +179,7 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 ], ) def test_guess_datetime_format_with_parseable_formats(string, fmt): From b2f5bf3b327f7c931a54016ecfc1521898a4fe05 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sat, 17 Dec 2022 16:30:47 +0000 Subject: [PATCH 2/2] stricter regex --- pandas/_libs/tslibs/parsing.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 76eebb2e35d03..992e1d90f4f3b 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1016,7 +1016,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: cdef str _fill_token(token: str, padding: int): cdef str token_filled - if re.search(r"\d*\.\d+", token) is None: + if re.search(r"\d+\.\d+", token) is None: # For example: 98 token_filled = token.zfill(padding) else: