diff --git a/doc/source/whatsnew/v2.0.2.rst b/doc/source/whatsnew/v2.0.2.rst index f3432564233a1..1f1786151ffb9 100644 --- a/doc/source/whatsnew/v2.0.2.rst +++ b/doc/source/whatsnew/v2.0.2.rst @@ -29,6 +29,7 @@ Bug fixes - Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`) - Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`) - Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`) +- Bug in :func:`to_datetime` was inferring format to contain ``"%H"`` instead of ``"%I"`` if date contained "AM" / "PM" tokens (:issue:`53147`) - Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`) - Bug in :meth:`DataFrame.sort_values` raising for PyArrow ``dictionary`` dtype (:issue:`53232`) - Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 445683968c58f..146e14f622ccd 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -990,6 +990,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: output_format.append(tokens[i]) + # if am/pm token present, replace 24-hour %H, with 12-hour %I + if "%p" in output_format and "%H" in output_format: + i = output_format.index("%H") + output_format[i] = "%I" + guessed_format = "".join(output_format) try: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 82c2375ffd628..e741fd310eb41 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2953,6 +2953,9 @@ class TestDatetimeParsingWrappers: ("2005-11-09 10:15", datetime(2005, 11, 9, 10, 15)), ("2005-11-09 08H", datetime(2005, 11, 9, 8, 0)), ("2005/11/09 10:15", datetime(2005, 11, 9, 10, 15)), + ("2005/11/09 10:15:32", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 AM", datetime(2005, 11, 9, 10, 15, 32)), + ("2005/11/09 10:15:32 PM", datetime(2005, 11, 9, 22, 15, 32)), ("2005/11/09 08H", datetime(2005, 11, 9, 8, 0)), ("Thu Sep 25 10:36:28 2003", datetime(2003, 9, 25, 10, 36, 28)), ("Thu Sep 25 2003", datetime(2003, 9, 25)), diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 6500afdf87beb..e33c6e37ac0e7 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -201,8 +201,10 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), - ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"), - ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %H:%M:%S %p"), + ("Tue 24 Aug 2021 01:30:48", "%a %d %b %Y %H:%M:%S"), + ("Tuesday 24 Aug 2021 01:30:48", "%A %d %b %Y %H:%M:%S"), + ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %I:%M:%S %p"), + ("Tuesday 24 Aug 2021 01:30:48 AM", "%A %d %b %Y %I:%M:%S %p"), ("27.03.2003 14:55:00.000", "%d.%m.%Y %H:%M:%S.%f"), # GH50317 ], )