diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index a181133c14f2b..763530520cd29 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -62,6 +62,7 @@ from pandas._libs.tslibs.np_datetime cimport ( string_to_dts, ) from pandas._libs.tslibs.offsets cimport is_offset_object +from pandas._libs.tslibs.strptime import array_strptime from pandas._libs.tslibs.util cimport ( get_c_string_buf_and_size, is_array, @@ -958,7 +959,9 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: Returns ------- - ret : datetime format string (for `strftime` or `strptime`) + str or None : ret + datetime format string (for `strftime` or `strptime`), + or None if it can't be guessed. """ if not isinstance(dt_str, str): @@ -1079,6 +1082,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: guessed_format = ''.join(output_format) + try: + array_strptime(np.asarray([dt_str], dtype=object), guessed_format) + except ValueError: + # Doesn't parse, so this can't be the correct format. + return None # rebuild string, capturing any inferred padding dt_str = ''.join(tokens) if parsed_datetime.strftime(guessed_format) == dt_str: diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 03084fcbdcb11..e0166c876cdf5 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -152,25 +152,27 @@ def test_parsers_month_freq(date_str, expected): ("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"), ("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"), ("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z"), + # The +9 format for offsets is supported by dateutil, + # but don't round-trip, see https://github.com/pandas-dev/pandas/issues/48921 + ("2011-12-30T00:00:00+9", None), + ("2011-12-30T00:00:00+09", None), ("2011-12-30T00:00:00+090", None), ("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"), ("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"), ("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z"), - ("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z"), + ("2011-12-30T00:00:00+09:000", None), + ("2011-12-30T00:00:00+9:0", None), ("2011-12-30T00:00:00+09:", None), ("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"), ("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+9", None), + ("2011-12-30T00:00:00.000000+09", None), ("2011-12-30T00:00:00.000000+090", None), ("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"), ("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z"), - ("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"), + ("2011-12-30T00:00:00.000000+09:000", None), + ("2011-12-30T00:00:00.000000+9:0", None), ("2011-12-30T00:00:00.000000+09:", None), ("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"), ("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"),