Skip to content

Commit 4e88c3f

Browse files
authored
ERR non-ISO formats don't show position of error (#50366)
* wip * unify messages * fixup regexes * simplify * fix test * remove (search) and (match) from messages * simplify Co-authored-by: MarcoGorelli <>
1 parent b6eecb3 commit 4e88c3f

File tree

6 files changed

+68
-34
lines changed

6 files changed

+68
-34
lines changed

doc/source/whatsnew/v2.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ Other enhancements
101101
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
102102
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
103103
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
104+
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
104105
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
105106
-
106107

pandas/_libs/tslibs/parsing.pyx

+3-3
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ def parse_datetime_string(
263263
datetime dt
264264

265265
if not _does_string_look_like_datetime(date_string):
266-
raise ValueError(f"Given date string {date_string} not likely a datetime")
266+
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
267267

268268
if does_string_look_like_time(date_string):
269269
# use current datetime as default, not pass _DEFAULT_DATETIME
@@ -297,7 +297,7 @@ def parse_datetime_string(
297297
except TypeError:
298298
# following may be raised from dateutil
299299
# TypeError: 'NoneType' object is not iterable
300-
raise ValueError(f"Given date string {date_string} not likely a datetime")
300+
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
301301

302302
return dt
303303

@@ -373,7 +373,7 @@ cdef parse_datetime_string_with_reso(
373373
int out_tzoffset
374374

375375
if not _does_string_look_like_datetime(date_string):
376-
raise ValueError(f"Given date string {date_string} not likely a datetime")
376+
raise ValueError(f'Given date string "{date_string}" not likely a datetime')
377377

378378
parsed, reso = _parse_delimited_date(date_string, dayfirst)
379379
if parsed is not None:

pandas/_libs/tslibs/strptime.pyx

+13-6
Original file line numberDiff line numberDiff line change
@@ -236,17 +236,22 @@ def array_strptime(
236236
if exact:
237237
found = format_regex.match(val)
238238
if not found:
239-
raise ValueError(f"time data '{val}' does not match "
240-
f"format '{fmt}' (match)")
239+
raise ValueError(f"time data \"{val}\" at position {i} doesn't "
240+
f"match format \"{fmt}\"")
241241
if len(val) != found.end():
242-
raise ValueError(f"unconverted data remains: {val[found.end():]}")
242+
raise ValueError(
243+
f"unconverted data remains at position {i}: "
244+
f'"{val[found.end():]}"'
245+
)
243246

244247
# search
245248
else:
246249
found = format_regex.search(val)
247250
if not found:
248-
raise ValueError(f"time data {repr(val)} does not match format "
249-
f"{repr(fmt)} (search)")
251+
raise ValueError(
252+
f"time data \"{val}\" at position {i} doesn't match "
253+
f"format \"{fmt}\""
254+
)
250255

251256
iso_year = -1
252257
year = 1900
@@ -396,7 +401,9 @@ def array_strptime(
396401

397402
result_timezone[i] = tz
398403

399-
except (ValueError, OutOfBoundsDatetime):
404+
except (ValueError, OutOfBoundsDatetime) as ex:
405+
if isinstance(ex, OutOfBoundsDatetime):
406+
ex.args = (f"{str(ex)} present at position {i}",)
400407
if is_coerce:
401408
iresult[i] = NPY_NAT
402409
continue

pandas/tests/io/parser/test_parse_dates.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -1720,7 +1720,9 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
17201720
# GH46210
17211721
with pytest.raises(
17221722
ValueError,
1723-
match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
1723+
match=(
1724+
r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$'
1725+
),
17241726
):
17251727
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])
17261728

pandas/tests/scalar/period/test_period.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ def test_invalid_arguments(self):
303303
with pytest.raises(ValueError, match=msg):
304304
Period(month=1)
305305

306-
msg = "Given date string -2000 not likely a datetime"
306+
msg = '^Given date string "-2000" not likely a datetime$'
307307
with pytest.raises(ValueError, match=msg):
308308
Period("-2000", "A")
309309
msg = "day is out of range for month"

pandas/tests/tools/test_to_datetime.py

+47-23
Original file line numberDiff line numberDiff line change
@@ -479,7 +479,12 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
479479
fmt = "%Y-%m-%d %H:%M:%S %z"
480480
date = "2010-01-01 12:00:00 " + offset
481481

482-
msg = "does not match format|unconverted data remains"
482+
msg = "|".join(
483+
[
484+
r'^time data ".*" at position 0 doesn\'t match format ".*"$',
485+
r'^unconverted data remains at position 0: ".*"$',
486+
]
487+
)
483488
with pytest.raises(ValueError, match=msg):
484489
to_datetime([date], format=fmt)
485490

@@ -1093,7 +1098,7 @@ def test_datetime_bool_arrays_mixed(self, cache):
10931098
to_datetime([False, datetime.today()], cache=cache)
10941099
with pytest.raises(
10951100
ValueError,
1096-
match=r"^time data 'True' does not match format '%Y%m%d' \(match\)$",
1101+
match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$',
10971102
):
10981103
to_datetime(["20130101", True], cache=cache)
10991104
tm.assert_index_equal(
@@ -1132,11 +1137,13 @@ def test_datetime_invalid_scalar(self, value, format, warning):
11321137
res = to_datetime(value, errors="coerce", format=format)
11331138
assert res is NaT
11341139

1135-
msg = (
1136-
"does not match format|"
1137-
"unconverted data remains:|"
1138-
"second must be in 0..59|"
1139-
f"Given date string {value} not likely a datetime"
1140+
msg = "|".join(
1141+
[
1142+
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1143+
r'^Given date string "a" not likely a datetime present at position 0$',
1144+
r'^unconverted data remains at position 0: "9"$',
1145+
r"^second must be in 0..59: 00:01:99 present at position 0$",
1146+
]
11401147
)
11411148
with pytest.raises(ValueError, match=msg):
11421149
with tm.assert_produces_warning(warning, match="Could not infer format"):
@@ -1157,7 +1164,7 @@ def test_datetime_outofbounds_scalar(self, value, format, warning):
11571164
assert res is NaT
11581165

11591166
if format is not None:
1160-
msg = "does not match format|Out of bounds .* present at position 0"
1167+
msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$'
11611168
with pytest.raises(ValueError, match=msg):
11621169
to_datetime(value, errors="raise", format=format)
11631170
else:
@@ -1181,11 +1188,13 @@ def test_datetime_invalid_index(self, values, format, warning):
11811188
res = to_datetime(values, errors="coerce", format=format)
11821189
tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values)))
11831190

1184-
msg = (
1185-
"does not match format|"
1186-
"unconverted data remains:|"
1187-
f"Given date string {values[0]} not likely a datetime|"
1188-
"second must be in 0..59"
1191+
msg = "|".join(
1192+
[
1193+
r'^Given date string "a" not likely a datetime present at position 0$',
1194+
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1195+
r'^unconverted data remains at position 0: "9"$',
1196+
r"^second must be in 0..59: 00:01:99 present at position 0$",
1197+
]
11891198
)
11901199
with pytest.raises(ValueError, match=msg):
11911200
with tm.assert_produces_warning(warning, match="Could not infer format"):
@@ -1805,8 +1814,8 @@ def test_dataframe_coerce(self, cache):
18051814
df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
18061815

18071816
msg = (
1808-
"cannot assemble the datetimes: time data .+ does not "
1809-
r"match format '%Y%m%d' \(match\)"
1817+
r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t '
1818+
r'match format "%Y%m%d"$'
18101819
)
18111820
with pytest.raises(ValueError, match=msg):
18121821
to_datetime(df2, cache=cache)
@@ -1882,7 +1891,10 @@ def test_dataframe_mixed(self, cache):
18821891
def test_dataframe_float(self, cache):
18831892
# float
18841893
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
1885-
msg = "cannot assemble the datetimes: unconverted data remains: 1"
1894+
msg = (
1895+
r"^cannot assemble the datetimes: unconverted data remains at position "
1896+
r'0: "1"$'
1897+
)
18861898
with pytest.raises(ValueError, match=msg):
18871899
to_datetime(df, cache=cache)
18881900

@@ -2072,7 +2084,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
20722084
def test_to_datetime_with_space_in_series(self, cache):
20732085
# GH 6428
20742086
ser = Series(["10/18/2006", "10/18/2008", " "])
2075-
msg = r"^time data ' ' does not match format '%m/%d/%Y' \(match\)$"
2087+
msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$'
20762088
with pytest.raises(ValueError, match=msg):
20772089
to_datetime(ser, errors="raise", cache=cache)
20782090
result_coerce = to_datetime(ser, errors="coerce", cache=cache)
@@ -2342,7 +2354,10 @@ def test_dayfirst_warnings_invalid_input(self):
23422354

23432355
with pytest.raises(
23442356
ValueError,
2345-
match=r"time data '03/30/2011' does not match format '%d/%m/%Y' \(match\)$",
2357+
match=(
2358+
r'^time data "03/30/2011" at position 1 doesn\'t match format '
2359+
r'"%d/%m/%Y"$'
2360+
),
23462361
):
23472362
to_datetime(arr, dayfirst=True)
23482363

@@ -2410,7 +2425,11 @@ def test_to_datetime_infer_datetime_format_consistent_format(
24102425
def test_to_datetime_inconsistent_format(self, cache):
24112426
data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
24122427
ser = Series(np.array(data))
2413-
with pytest.raises(ValueError, match="does not match format"):
2428+
msg = (
2429+
r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format '
2430+
r'"%m/%d/%Y %H:%M:%S"$'
2431+
)
2432+
with pytest.raises(ValueError, match=msg):
24142433
to_datetime(ser, cache=cache)
24152434

24162435
def test_to_datetime_consistent_format(self, cache):
@@ -2923,17 +2942,22 @@ def test_incorrect_value_exception(self):
29232942
to_datetime(["today", "yesterday"])
29242943

29252944
@pytest.mark.parametrize(
2926-
"format, warning", [(None, UserWarning), ("%Y-%m-%d %H:%M:%S", None)]
2945+
"format, warning",
2946+
[
2947+
(None, UserWarning),
2948+
("%Y-%m-%d %H:%M:%S", None),
2949+
("%Y-%d-%m %H:%M:%S", None),
2950+
],
29272951
)
29282952
def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
29292953
# see gh-23830
29302954
msg = (
2931-
"Out of bounds nanosecond timestamp: 2417-10-27 00:00:00 "
2932-
"present at position 0"
2955+
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 "
2956+
r"present at position 0$"
29332957
)
29342958
with pytest.raises(OutOfBoundsDatetime, match=msg):
29352959
with tm.assert_produces_warning(warning, match="Could not infer format"):
2936-
to_datetime("2417-10-27 00:00:00", format=format)
2960+
to_datetime("2417-10-10 00:00:00", format=format)
29372961

29382962
@pytest.mark.parametrize(
29392963
"arg, origin, expected_str",

0 commit comments

Comments
 (0)