Skip to content

Commit d8e7651

Browse files
authored
BUG: to_datetime raising on invalid offsets with errors=coerce and infer_datetime_format (#48676)
* BUG: to_datetime raising on invalid offsets with errors=coerce and infer_datetime_format * fix invalid type test * test errors=ignore as well Co-authored-by: MarcoGorelli <>
1 parent c855be8 commit d8e7651

File tree

4 files changed

+47
-7
lines changed

4 files changed

+47
-7
lines changed

doc/source/whatsnew/v1.6.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -166,8 +166,10 @@ Categorical
166166
Datetimelike
167167
^^^^^^^^^^^^
168168
- Bug in :func:`pandas.infer_freq`, raising ``TypeError`` when inferred on :class:`RangeIndex` (:issue:`47084`)
169+
- Bug in :func:`to_datetime` was raising on invalid offsets with ``errors='coerce'`` and ``infer_datetime_format=True`` (:issue:`48633`)
169170
- Bug in :class:`DatetimeIndex` constructor failing to raise when ``tz=None`` is explicitly specified in conjunction with timezone-aware ``dtype`` or data (:issue:`48659`)
170171
- Bug in subtracting a ``datetime`` scalar from :class:`DatetimeIndex` failing to retain the original ``freq`` attribute (:issue:`48818`)
172+
-
171173

172174
Timedelta
173175
^^^^^^^^^

pandas/_libs/tslibs/parsing.pyx

+7-2
Original file line numberDiff line numberDiff line change
@@ -943,7 +943,7 @@ def format_is_iso(f: str) -> bint:
943943
return False
944944

945945

946-
def guess_datetime_format(dt_str, bint dayfirst=False):
946+
def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None:
947947
"""
948948
Guess the datetime format of a given datetime string.
949949

@@ -1026,7 +1026,12 @@ def guess_datetime_format(dt_str, bint dayfirst=False):
10261026
# This separation will prevent subsequent processing
10271027
# from correctly parsing the time zone format.
10281028
# So in addition to the format nomalization, we rejoin them here.
1029-
tokens[offset_index] = parsed_datetime.strftime("%z")
1029+
try:
1030+
tokens[offset_index] = parsed_datetime.strftime("%z")
1031+
except ValueError:
1032+
# Invalid offset might not have raised in du_parse
1033+
# https://github.com/dateutil/dateutil/issues/188
1034+
return None
10301035
tokens = tokens[:offset_index + 1 or None]
10311036

10321037
format_guess = [None] * len(tokens)

pandas/tests/tools/test_to_datetime.py

+27-3
Original file line numberDiff line numberDiff line change
@@ -1142,13 +1142,37 @@ def test_to_datetime_coerce(self):
11421142
)
11431143
tm.assert_index_equal(result, expected)
11441144

1145-
def test_to_datetime_coerce_malformed(self):
1145+
@pytest.mark.parametrize("infer_datetime_format", [True, False])
1146+
@pytest.mark.parametrize(
1147+
"errors, expected",
1148+
[
1149+
("coerce", Index([NaT, NaT])),
1150+
("ignore", Index(["200622-12-31", "111111-24-11"])),
1151+
],
1152+
)
1153+
def test_to_datetime_malformed_no_raise(
1154+
self, errors, expected, infer_datetime_format
1155+
):
11461156
# GH 28299
1157+
# GH 48633
11471158
ts_strings = ["200622-12-31", "111111-24-11"]
1148-
result = to_datetime(ts_strings, errors="coerce")
1149-
expected = Index([NaT, NaT])
1159+
result = to_datetime(
1160+
ts_strings, errors=errors, infer_datetime_format=infer_datetime_format
1161+
)
11501162
tm.assert_index_equal(result, expected)
11511163

1164+
@pytest.mark.parametrize("infer_datetime_format", [True, False])
1165+
def test_to_datetime_malformed_raise(self, infer_datetime_format):
1166+
# GH 48633
1167+
ts_strings = ["200622-12-31", "111111-24-11"]
1168+
with pytest.raises(
1169+
ValueError,
1170+
match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
1171+
):
1172+
to_datetime(
1173+
ts_strings, errors="raise", infer_datetime_format=infer_datetime_format
1174+
)
1175+
11521176
def test_iso_8601_strings_with_same_offset(self):
11531177
# GH 17697, 11736
11541178
ts_str = "2015-11-18 15:30:00+05:30"

pandas/tests/tslibs/test_parsing.py

+11-2
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,6 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt):
212212
"1/1/1/1",
213213
"this_is_not_a_datetime",
214214
"51a",
215-
9,
216-
datetime(2011, 1, 1),
217215
],
218216
)
219217
def test_guess_datetime_format_invalid_inputs(invalid_dt):
@@ -222,6 +220,17 @@ def test_guess_datetime_format_invalid_inputs(invalid_dt):
222220
assert parsing.guess_datetime_format(invalid_dt) is None
223221

224222

223+
@pytest.mark.parametrize("invalid_type_dt", [9, datetime(2011, 1, 1)])
224+
def test_guess_datetime_format_wrong_type_inputs(invalid_type_dt):
225+
# A datetime string must include a year, month and a day for it to be
226+
# guessable, in addition to being a string that looks like a datetime.
227+
with pytest.raises(
228+
TypeError,
229+
match=r"^Argument 'dt_str' has incorrect type \(expected str, got .*\)$",
230+
):
231+
parsing.guess_datetime_format(invalid_type_dt)
232+
233+
225234
@pytest.mark.parametrize(
226235
"string,fmt",
227236
[

0 commit comments

Comments
 (0)