Skip to content

Commit b7e4c11

Browse files
MarcoGorellinoatamir
authored andcommitted
BUG: guess_datetime_format doesn't guess format correctly for UTC+1 (pandas-dev#48954)
* BUG: guess_datetime_format doesn't guess format correctly for UTC+1 * :doc: add comment on +9 offset Co-authored-by: MarcoGorelli <>
1 parent cbe5f86 commit b7e4c11

File tree

2 files changed

+19
-9
lines changed

2 files changed

+19
-9
lines changed

pandas/_libs/tslibs/parsing.pyx

+9-1
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ from pandas._libs.tslibs.np_datetime cimport (
6262
string_to_dts,
6363
)
6464
from pandas._libs.tslibs.offsets cimport is_offset_object
65+
from pandas._libs.tslibs.strptime import array_strptime
6566
from pandas._libs.tslibs.util cimport (
6667
get_c_string_buf_and_size,
6768
is_array,
@@ -958,7 +959,9 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
958959

959960
Returns
960961
-------
961-
ret : datetime format string (for `strftime` or `strptime`)
962+
str or None : ret
963+
datetime format string (for `strftime` or `strptime`),
964+
or None if it can't be guessed.
962965
"""
963966

964967
if not isinstance(dt_str, str):
@@ -1079,6 +1082,11 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
10791082

10801083
guessed_format = ''.join(output_format)
10811084

1085+
try:
1086+
array_strptime(np.asarray([dt_str], dtype=object), guessed_format)
1087+
except ValueError:
1088+
# Doesn't parse, so this can't be the correct format.
1089+
return None
10821090
# rebuild string, capturing any inferred padding
10831091
dt_str = ''.join(tokens)
10841092
if parsed_datetime.strftime(guessed_format) == dt_str:

pandas/tests/tslibs/test_parsing.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -152,25 +152,27 @@ def test_parsers_month_freq(date_str, expected):
152152
("2011-12-30T00:00:00", "%Y-%m-%dT%H:%M:%S"),
153153
("2011-12-30T00:00:00UTC", "%Y-%m-%dT%H:%M:%S%Z"),
154154
("2011-12-30T00:00:00Z", "%Y-%m-%dT%H:%M:%S%z"),
155-
("2011-12-30T00:00:00+9", "%Y-%m-%dT%H:%M:%S%z"),
156-
("2011-12-30T00:00:00+09", "%Y-%m-%dT%H:%M:%S%z"),
155+
# The +9 format for offsets is supported by dateutil,
156+
# but don't round-trip, see https://github.com/pandas-dev/pandas/issues/48921
157+
("2011-12-30T00:00:00+9", None),
158+
("2011-12-30T00:00:00+09", None),
157159
("2011-12-30T00:00:00+090", None),
158160
("2011-12-30T00:00:00+0900", "%Y-%m-%dT%H:%M:%S%z"),
159161
("2011-12-30T00:00:00-0900", "%Y-%m-%dT%H:%M:%S%z"),
160162
("2011-12-30T00:00:00+09:00", "%Y-%m-%dT%H:%M:%S%z"),
161-
("2011-12-30T00:00:00+09:000", "%Y-%m-%dT%H:%M:%S%z"),
162-
("2011-12-30T00:00:00+9:0", "%Y-%m-%dT%H:%M:%S%z"),
163+
("2011-12-30T00:00:00+09:000", None),
164+
("2011-12-30T00:00:00+9:0", None),
163165
("2011-12-30T00:00:00+09:", None),
164166
("2011-12-30T00:00:00.000000UTC", "%Y-%m-%dT%H:%M:%S.%f%Z"),
165167
("2011-12-30T00:00:00.000000Z", "%Y-%m-%dT%H:%M:%S.%f%z"),
166-
("2011-12-30T00:00:00.000000+9", "%Y-%m-%dT%H:%M:%S.%f%z"),
167-
("2011-12-30T00:00:00.000000+09", "%Y-%m-%dT%H:%M:%S.%f%z"),
168+
("2011-12-30T00:00:00.000000+9", None),
169+
("2011-12-30T00:00:00.000000+09", None),
168170
("2011-12-30T00:00:00.000000+090", None),
169171
("2011-12-30T00:00:00.000000+0900", "%Y-%m-%dT%H:%M:%S.%f%z"),
170172
("2011-12-30T00:00:00.000000-0900", "%Y-%m-%dT%H:%M:%S.%f%z"),
171173
("2011-12-30T00:00:00.000000+09:00", "%Y-%m-%dT%H:%M:%S.%f%z"),
172-
("2011-12-30T00:00:00.000000+09:000", "%Y-%m-%dT%H:%M:%S.%f%z"),
173-
("2011-12-30T00:00:00.000000+9:0", "%Y-%m-%dT%H:%M:%S.%f%z"),
174+
("2011-12-30T00:00:00.000000+09:000", None),
175+
("2011-12-30T00:00:00.000000+9:0", None),
174176
("2011-12-30T00:00:00.000000+09:", None),
175177
("2011-12-30 00:00:00.000000", "%Y-%m-%d %H:%M:%S.%f"),
176178
("Tue 24 Aug 2021 01:30:48 AM", "%a %d %b %Y %H:%M:%S %p"),

0 commit comments

Comments
 (0)