Skip to content

Commit 445bed9

Browse files
authored
ERR: "day out of range" doesn't show position of error (#50464)
* show position of error if out-of-bounds non-ISO * fixup some more tests Co-authored-by: MarcoGorelli <>
1 parent e423eee commit 445bed9

File tree

9 files changed

+94
-60
lines changed

9 files changed

+94
-60
lines changed

pandas/_libs/tslib.pyx

+8-8
Original file line numberDiff line numberDiff line change
@@ -550,8 +550,8 @@ cpdef array_to_datetime(
550550
continue
551551
elif is_raise:
552552
raise ValueError(
553-
f"time data \"{val}\" at position {i} doesn't "
554-
f"match format \"{format}\""
553+
f"time data \"{val}\" doesn't "
554+
f"match format \"{format}\", at position {i}"
555555
)
556556
return values, tz_out
557557
# these must be ns unit by-definition
@@ -599,8 +599,8 @@ cpdef array_to_datetime(
599599
continue
600600
elif is_raise:
601601
raise ValueError(
602-
f"time data \"{val}\" at position {i} doesn't "
603-
f"match format \"{format}\""
602+
f"time data \"{val}\" doesn't "
603+
f"match format \"{format}\", at position {i}"
604604
)
605605
return values, tz_out
606606

@@ -617,8 +617,8 @@ cpdef array_to_datetime(
617617
iresult[i] = NPY_NAT
618618
continue
619619
raise TypeError(
620-
f"invalid string coercion to datetime for \"{val}\" "
621-
f"at position {i}"
620+
f"invalid string coercion to datetime "
621+
f"for \"{val}\", at position {i}"
622622
)
623623

624624
if tz is not None:
@@ -661,7 +661,7 @@ cpdef array_to_datetime(
661661
raise TypeError(f"{type(val)} is not convertible to datetime")
662662

663663
except OutOfBoundsDatetime as ex:
664-
ex.args = (str(ex) + f" present at position {i}", )
664+
ex.args = (f"{ex}, at position {i}",)
665665
if is_coerce:
666666
iresult[i] = NPY_NAT
667667
continue
@@ -821,7 +821,7 @@ cdef _array_to_datetime_object(
821821
pydatetime_to_dt64(oresult[i], &dts)
822822
check_dts_bounds(&dts)
823823
except (ValueError, OverflowError) as ex:
824-
ex.args = (f"{ex} present at position {i}", )
824+
ex.args = (f"{ex}, at position {i}", )
825825
if is_coerce:
826826
oresult[i] = <object>NaT
827827
continue

pandas/_libs/tslibs/strptime.pyx

+4-5
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,11 @@ def array_strptime(
236236
if exact:
237237
found = format_regex.match(val)
238238
if not found:
239-
raise ValueError(f"time data \"{val}\" at position {i} doesn't "
239+
raise ValueError(f"time data \"{val}\" doesn't "
240240
f"match format \"{fmt}\"")
241241
if len(val) != found.end():
242242
raise ValueError(
243-
f"unconverted data remains at position {i}: "
243+
f"unconverted data remains: "
244244
f'"{val[found.end():]}"'
245245
)
246246

@@ -249,7 +249,7 @@ def array_strptime(
249249
found = format_regex.search(val)
250250
if not found:
251251
raise ValueError(
252-
f"time data \"{val}\" at position {i} doesn't match "
252+
f"time data \"{val}\" doesn't match "
253253
f"format \"{fmt}\""
254254
)
255255

@@ -402,8 +402,7 @@ def array_strptime(
402402
result_timezone[i] = tz
403403

404404
except (ValueError, OutOfBoundsDatetime) as ex:
405-
if isinstance(ex, OutOfBoundsDatetime):
406-
ex.args = (f"{str(ex)} present at position {i}",)
405+
ex.args = (f"{str(ex)}, at position {i}",)
407406
if is_coerce:
408407
iresult[i] = NPY_NAT
409408
continue

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def f(dtype):
259259
f("float64")
260260

261261
# 10822
262-
msg = "Unknown string format: aa present at position 0"
262+
msg = "^Unknown string format: aa, at position 0$"
263263
with pytest.raises(ValueError, match=msg):
264264
f("M8[ns]")
265265

pandas/tests/indexes/datetimes/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -547,7 +547,7 @@ def test_construction_outofbounds(self):
547547
# coerces to object
548548
tm.assert_index_equal(Index(dates), exp)
549549

550-
msg = "Out of bounds .* present at position 0"
550+
msg = "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, at position 0$"
551551
with pytest.raises(OutOfBoundsDatetime, match=msg):
552552
# can't create DatetimeIndex
553553
DatetimeIndex(dates)

pandas/tests/indexes/datetimes/test_scalar_compat.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ def test_dti_date(self):
3838
@pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]])
3939
def test_dti_date_out_of_range(self, data):
4040
# GH#1475
41-
msg = "Out of bounds .* present at position 0"
41+
msg = "^Out of bounds nanosecond timestamp: 1400-01-01 00:00:00, at position 0$"
4242
with pytest.raises(OutOfBoundsDatetime, match=msg):
4343
DatetimeIndex(data)
4444

pandas/tests/io/parser/test_parse_dates.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1721,7 +1721,7 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
17211721
with pytest.raises(
17221722
ValueError,
17231723
match=(
1724-
r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$'
1724+
r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y", at position 1$'
17251725
),
17261726
):
17271727
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])

pandas/tests/series/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def test_infer_with_date_and_datetime(self):
7676
def test_unparseable_strings_with_dt64_dtype(self):
7777
# pre-2.0 these would be silently ignored and come back with object dtype
7878
vals = ["aa"]
79-
msg = "Unknown string format: aa present at position 0"
79+
msg = "^Unknown string format: aa, at position 0$"
8080
with pytest.raises(ValueError, match=msg):
8181
Series(vals, dtype="datetime64[ns]")
8282

pandas/tests/tools/test_to_datetime.py

+75-38
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,8 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
480480

481481
msg = "|".join(
482482
[
483-
r'^time data ".*" at position 0 doesn\'t match format ".*"$',
484-
r'^unconverted data remains at position 0: ".*"$',
483+
r'^time data ".*" doesn\'t match format ".*", at position 0$',
484+
r'^unconverted data remains: ".*", at position 0$',
485485
]
486486
)
487487
with pytest.raises(ValueError, match=msg):
@@ -858,7 +858,7 @@ def test_to_datetime_dt64s_and_str(self, arg, format):
858858
"dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
859859
)
860860
def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
861-
msg = "Out of bounds .* present at position 0"
861+
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
862862
with pytest.raises(OutOfBoundsDatetime, match=msg):
863863
to_datetime(dt, errors="raise")
864864

@@ -1097,7 +1097,7 @@ def test_datetime_bool_arrays_mixed(self, cache):
10971097
to_datetime([False, datetime.today()], cache=cache)
10981098
with pytest.raises(
10991099
ValueError,
1100-
match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$',
1100+
match=r'^time data "True" doesn\'t match format "%Y%m%d", at position 1$',
11011101
):
11021102
to_datetime(["20130101", True], cache=cache)
11031103
tm.assert_index_equal(
@@ -1138,10 +1138,10 @@ def test_datetime_invalid_scalar(self, value, format, warning):
11381138

11391139
msg = "|".join(
11401140
[
1141-
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1142-
r'^Given date string "a" not likely a datetime present at position 0$',
1143-
r'^unconverted data remains at position 0: "9"$',
1144-
r"^second must be in 0..59: 00:01:99 present at position 0$",
1141+
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$',
1142+
r'^Given date string "a" not likely a datetime, at position 0$',
1143+
r'^unconverted data remains: "9", at position 0$',
1144+
r"^second must be in 0..59: 00:01:99, at position 0$",
11451145
]
11461146
)
11471147
with pytest.raises(ValueError, match=msg):
@@ -1163,11 +1163,11 @@ def test_datetime_outofbounds_scalar(self, value, format, warning):
11631163
assert res is NaT
11641164

11651165
if format is not None:
1166-
msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$'
1166+
msg = r'^time data ".*" doesn\'t match format ".*", at position 0$'
11671167
with pytest.raises(ValueError, match=msg):
11681168
to_datetime(value, errors="raise", format=format)
11691169
else:
1170-
msg = "Out of bounds .* present at position 0"
1170+
msg = "^Out of bounds .*, at position 0$"
11711171
with pytest.raises(
11721172
OutOfBoundsDatetime, match=msg
11731173
), tm.assert_produces_warning(warning, match="Could not infer format"):
@@ -1189,10 +1189,10 @@ def test_datetime_invalid_index(self, values, format, warning):
11891189

11901190
msg = "|".join(
11911191
[
1192-
r'^Given date string "a" not likely a datetime present at position 0$',
1193-
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1194-
r'^unconverted data remains at position 0: "9"$',
1195-
r"^second must be in 0..59: 00:01:99 present at position 0$",
1192+
r'^Given date string "a" not likely a datetime, at position 0$',
1193+
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$',
1194+
r'^unconverted data remains: "9", at position 0$',
1195+
r"^second must be in 0..59: 00:01:99, at position 0$",
11961196
]
11971197
)
11981198
with pytest.raises(ValueError, match=msg):
@@ -1372,7 +1372,7 @@ def test_to_datetime_malformed_raise(self):
13721372
ts_strings = ["200622-12-31", "111111-24-11"]
13731373
with pytest.raises(
13741374
ValueError,
1375-
match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
1375+
match=r"^hour must be in 0\.\.23: 111111-24-11, at position 1$",
13761376
):
13771377
with tm.assert_produces_warning(
13781378
UserWarning, match="Could not infer format"
@@ -1845,8 +1845,8 @@ def test_dataframe_coerce(self, cache):
18451845
df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
18461846

18471847
msg = (
1848-
r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t '
1849-
r'match format "%Y%m%d"$'
1848+
r'^cannot assemble the datetimes: time data ".+" doesn\'t '
1849+
r'match format "%Y%m%d", at position 1$'
18501850
)
18511851
with pytest.raises(ValueError, match=msg):
18521852
to_datetime(df2, cache=cache)
@@ -1923,8 +1923,8 @@ def test_dataframe_float(self, cache):
19231923
# float
19241924
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
19251925
msg = (
1926-
r"^cannot assemble the datetimes: unconverted data remains at position "
1927-
r'0: "1"$'
1926+
r"^cannot assemble the datetimes: unconverted data remains: "
1927+
r'"1", at position 0$'
19281928
)
19291929
with pytest.raises(ValueError, match=msg):
19301930
to_datetime(df, cache=cache)
@@ -1946,7 +1946,7 @@ def test_to_datetime_barely_out_of_bounds(self):
19461946
# in an in-bounds datetime
19471947
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
19481948

1949-
msg = "Out of bounds .* present at position 0"
1949+
msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
19501950
with pytest.raises(OutOfBoundsDatetime, match=msg):
19511951
with tm.assert_produces_warning(
19521952
UserWarning, match="Could not infer format"
@@ -1985,8 +1985,8 @@ def test_to_datetime_iso8601_fails(self, input, format, exact):
19851985
with pytest.raises(
19861986
ValueError,
19871987
match=(
1988-
rf"time data \"{input}\" at position 0 doesn't match format "
1989-
rf"\"{format}\""
1988+
rf"time data \"{input}\" doesn't match format "
1989+
rf"\"{format}\", at position 0"
19901990
),
19911991
):
19921992
to_datetime(input, format=format, exact=exact)
@@ -2007,8 +2007,8 @@ def test_to_datetime_iso8601_exact_fails(self, input, format):
20072007
with pytest.raises(
20082008
ValueError,
20092009
match=(
2010-
rf"time data \"{input}\" at position 0 doesn't match format "
2011-
rf"\"{format}\""
2010+
rf"time data \"{input}\" doesn't match format "
2011+
rf"\"{format}\", at position 0"
20122012
),
20132013
):
20142014
to_datetime(input, format=format)
@@ -2046,8 +2046,8 @@ def test_to_datetime_iso8601_separator(self, input, format):
20462046
with pytest.raises(
20472047
ValueError,
20482048
match=(
2049-
rf"time data \"{input}\" at position 0 doesn\'t match format "
2050-
rf"\"{format}\""
2049+
rf"time data \"{input}\" doesn\'t match format "
2050+
rf"\"{format}\", at position 0"
20512051
),
20522052
):
20532053
to_datetime(input, format=format)
@@ -2115,7 +2115,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
21152115
def test_to_datetime_with_space_in_series(self, cache):
21162116
# GH 6428
21172117
ser = Series(["10/18/2006", "10/18/2008", " "])
2118-
msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$'
2118+
msg = r'^time data " " doesn\'t match format "%m/%d/%Y", at position 2$'
21192119
with pytest.raises(ValueError, match=msg):
21202120
to_datetime(ser, errors="raise", cache=cache)
21212121
result_coerce = to_datetime(ser, errors="coerce", cache=cache)
@@ -2386,8 +2386,8 @@ def test_dayfirst_warnings_invalid_input(self):
23862386
with pytest.raises(
23872387
ValueError,
23882388
match=(
2389-
r'^time data "03/30/2011" at position 1 doesn\'t match format '
2390-
r'"%d/%m/%Y"$'
2389+
r'^time data "03/30/2011" doesn\'t match format '
2390+
r'"%d/%m/%Y", at position 1$'
23912391
),
23922392
):
23932393
to_datetime(arr, dayfirst=True)
@@ -2457,8 +2457,8 @@ def test_to_datetime_inconsistent_format(self, cache):
24572457
data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
24582458
ser = Series(np.array(data))
24592459
msg = (
2460-
r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format '
2461-
r'"%m/%d/%Y %H:%M:%S"$'
2460+
r'^time data "01-02-2011 00:00:00" doesn\'t match format '
2461+
r'"%m/%d/%Y %H:%M:%S", at position 1$'
24622462
)
24632463
with pytest.raises(ValueError, match=msg):
24642464
to_datetime(ser, cache=cache)
@@ -2581,11 +2581,49 @@ def test_day_not_in_month_raise(self, cache):
25812581
):
25822582
to_datetime("2015-02-29", errors="raise", cache=cache)
25832583

2584-
@pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
2585-
def test_day_not_in_month_raise_value(self, cache, arg):
2586-
msg = f'time data "{arg}" at position 0 doesn\'t match format "%Y-%m-%d"'
2584+
@pytest.mark.parametrize(
2585+
"arg, format, msg",
2586+
[
2587+
(
2588+
"2015-02-29",
2589+
"%Y-%m-%d",
2590+
'^time data "2015-02-29" doesn\'t match format "%Y-%m-%d", '
2591+
"at position 0$",
2592+
),
2593+
(
2594+
"2015-29-02",
2595+
"%Y-%d-%m",
2596+
"^day is out of range for month, at position 0$",
2597+
),
2598+
(
2599+
"2015-02-32",
2600+
"%Y-%m-%d",
2601+
'^time data "2015-02-32" doesn\'t match format "%Y-%m-%d", '
2602+
"at position 0$",
2603+
),
2604+
(
2605+
"2015-32-02",
2606+
"%Y-%d-%m",
2607+
'^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
2608+
"at position 0$",
2609+
),
2610+
(
2611+
"2015-04-31",
2612+
"%Y-%m-%d",
2613+
'^time data "2015-04-31" doesn\'t match format "%Y-%m-%d", '
2614+
"at position 0$",
2615+
),
2616+
(
2617+
"2015-31-04",
2618+
"%Y-%d-%m",
2619+
"^day is out of range for month, at position 0$",
2620+
),
2621+
],
2622+
)
2623+
def test_day_not_in_month_raise_value(self, cache, arg, format, msg):
2624+
# https://github.com/pandas-dev/pandas/issues/50462
25872625
with pytest.raises(ValueError, match=msg):
2588-
to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
2626+
to_datetime(arg, errors="raise", format=format, cache=cache)
25892627

25902628
@pytest.mark.parametrize(
25912629
"expected, format, warning",
@@ -2965,7 +3003,7 @@ def test_invalid_origins_tzinfo(self):
29653003
def test_incorrect_value_exception(self):
29663004
# GH47495
29673005
with pytest.raises(
2968-
ValueError, match="Unknown string format: yesterday present at position 1"
3006+
ValueError, match="Unknown string format: yesterday, at position 1"
29693007
):
29703008
with tm.assert_produces_warning(
29713009
UserWarning, match="Could not infer format"
@@ -2983,8 +3021,7 @@ def test_incorrect_value_exception(self):
29833021
def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
29843022
# see gh-23830
29853023
msg = (
2986-
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 "
2987-
r"present at position 0$"
3024+
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0$"
29883025
)
29893026
with pytest.raises(OutOfBoundsDatetime, match=msg):
29903027
with tm.assert_produces_warning(warning, match="Could not infer format"):

pandas/tests/tslibs/test_array_to_datetime.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors):
126126
kwargs = {"values": arr, "errors": errors}
127127

128128
if errors == "raise":
129-
msg = "Out of bounds .* present at position 0"
129+
msg = "^Out of bounds nanosecond timestamp: .*, at position 0$"
130130

131131
with pytest.raises(ValueError, match=msg):
132132
tslib.array_to_datetime(**kwargs)
@@ -171,9 +171,7 @@ def test_to_datetime_barely_out_of_bounds():
171171
# Close enough to bounds that dropping nanos
172172
# would result in an in-bounds datetime.
173173
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
174-
msg = (
175-
"Out of bounds nanosecond timestamp: 2262-04-11 23:47:16 present at position 0"
176-
)
174+
msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$"
177175

178176
with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
179177
tslib.array_to_datetime(arr)

0 commit comments

Comments
 (0)