From b7d1476f9a0d0a2380895b2ce6ac6fdd1db105b0 Mon Sep 17 00:00:00 2001 From: Julian Badillo Date: Mon, 15 May 2023 20:05:40 +0000 Subject: [PATCH 1/5] BUG: Add am/pm parsing support on guess_format --- pandas/_libs/tslibs/parsing.pyx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 0cf03ecf34c41..dbaba8c8c9913 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -1019,6 +1019,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: output_format.append(tokens[i]) + # if am/pm token present, replace 24-hour %H, with 12-hour %I + if "%p" in format_guess and "%H" in format_guess: + i = format_guess.index("%H") + format_guess[i] = "%I" + guessed_format = "".join(output_format) try: From a9c9734ce1f3a811e511d352020bd948dae07124 Mon Sep 17 00:00:00 2001 From: Julian Badillo Date: Mon, 15 May 2023 20:14:30 +0000 Subject: [PATCH 2/5] :pick: add unit tests --- pandas/tests/tools/test_to_datetime.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 0b5696116e610..8eee9e9725506 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2942,6 +2942,11 @@ class TestDatetimeParsingWrappers: ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)), ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)), ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)), + ("2005/11/09 10:15 AM", datetime(2005, 11, 9, 10, 15)), + ("2005/11/09 10:15 PM", datetime(2005, 11, 9, 22, 15)), + ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)), ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)), ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)), ("Thu Sep 25 2003", datetime(2003, 9, 25)), From 9fae954a44897124d331ee862df775e40f566f1d Mon Sep 17 00:00:00 2001 From: Julian Badillo Date: Mon, 15 May 2023 20:30:58 +0000 Subject: [PATCH 3/5] more unit tests --- pandas/tests/tslibs/test_parsing.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 587527c2058d7..c73d03a633f12 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -208,6 +208,8 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), + ("2005/11/09 10:15 AM", "%Y/%m/%d %I:%M %p"), + ("2005/11/09 10:15:32 AM", "%Y/%m/%d %I:%M:%S %p"), ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 From 36f7d860dc414e344cd5da6895626040f24d2fb4 Mon Sep 17 00:00:00 2001 From: Julian Badillo Date: Tue, 16 May 2023 13:42:51 +0000 Subject: [PATCH 4/5] :pick: not guess am/pm mark --- pandas/_libs/tslibs/parsing.pyx | 6 ------ pandas/tests/tools/test_to_datetime.py | 4 ---- pandas/tests/tslibs/test_parsing.py | 6 ++---- 3 files changed, 2 insertions(+), 14 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index dbaba8c8c9913..c7a3da3d378b7 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -908,7 +908,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: (("tzinfo",), "%Z", 0), (("day_of_week",), "%a", 0), (("day_of_week",), "%A", 0), - (("meridiem",), "%p", 0), ] if dayfirst: @@ -1019,11 +1018,6 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: output_format.append(tokens[i]) - # if am/pm token present, replace 24-hour %H, with 12-hour %I - if "%p" in format_guess and "%H" in format_guess: - i = format_guess.index("%H") - format_guess[i] = "%I" - guessed_format = "".join(output_format) try: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 8eee9e9725506..b8079375b2aaf 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2942,11 +2942,7 @@ class TestDatetimeParsingWrappers: ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)), ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)), ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)), - ("2005/11/09 10:15 AM", datetime(2005, 11, 9, 10, 15)), - ("2005/11/09 10:15 PM", datetime(2005, 11, 9, 22, 15)), ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)), - ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)), - ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)), ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)), ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)), ("Thu Sep 25 2003", datetime(2003, 9, 25)), diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index c73d03a633f12..a885040128b65 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -208,10 +208,8 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), - ("2005/11/09 10:15 AM", "%Y/%m/%d %I:%M %p"), - ("2005/11/09 10:15:32 AM", "%Y/%m/%d %I:%M:%S %p"), - ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), - ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ("Tue 24 Aug 2021 01:30:48", "%a %d %b %Y %H:%M:%S"), + ("Tuesday 24 Aug 2021 01:30:48", "%A %d %b %Y %H:%M:%S"), ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 ], ) From 554a136bf0938d449ce04144f1425526bc937c54 Mon Sep 17 00:00:00 2001 From: Julian Badillo Date: Tue, 16 May 2023 14:19:06 +0000 Subject: [PATCH 5/5] Unit test and comments --- doc/source/whatsnew/v2.1.0.rst | 2 +- pandas/tests/tslibs/test_parsing.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst index 201bf23ea2339..b4b606e349f51 100644 --- a/doc/source/whatsnew/v2.1.0.rst +++ b/doc/source/whatsnew/v2.1.0.rst @@ -352,7 +352,7 @@ Conversion - Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`) - Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`) - Bug in :meth:`DataFrame.insert` raising ``TypeError`` if ``loc`` is ``np.int64`` (:issue:`53193`) -- +- Bug in :meth:`datetimes._guess_datetime_format` if contains "AM" / "PM" tokens (:issue:`53147`) Strings ^^^^^^^ diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index a885040128b65..c6bf667ddd70c 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -210,6 +210,8 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), ("Tue 24 Aug 2021 01:30:48", "%a %d %b %Y %H:%M:%S"), ("Tuesday 24 Aug 2021 01:30:48", "%A %d %b %Y %H:%M:%S"), + ("Tue 24 Aug 2021 01:30:48 AM", None), + ("Tuesday 24 Aug 2021 01:30:48 AM", None), ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 ], )