Skip to content

Commit af31616

Browse files
author
MarcoGorelli
committed
show position of error if out-of-bounds non-ISO
1 parent dd8b718 commit af31616

File tree

4 files changed

+39
-41
lines changed

4 files changed

+39
-41
lines changed

pandas/_libs/tslib.pyx

+6-6
Original file line numberDiff line numberDiff line change
@@ -508,7 +508,7 @@ cpdef array_to_datetime(
508508
continue
509509
elif is_raise:
510510
raise ValueError(
511-
f"time data \"{val}\" at position {i} doesn't "
511+
f"at position {i}, time data \"{val}\" doesn't "
512512
f"match format \"{format}\""
513513
)
514514
return values, tz_out
@@ -557,7 +557,7 @@ cpdef array_to_datetime(
557557
continue
558558
elif is_raise:
559559
raise ValueError(
560-
f"time data \"{val}\" at position {i} doesn't "
560+
f"at position {i}, time data \"{val}\" doesn't "
561561
f"match format \"{format}\""
562562
)
563563
return values, tz_out
@@ -575,8 +575,8 @@ cpdef array_to_datetime(
575575
iresult[i] = NPY_NAT
576576
continue
577577
raise TypeError(
578-
f"invalid string coercion to datetime for \"{val}\" "
579-
f"at position {i}"
578+
f"at position {i}, invalid string coercion to datetime "
579+
f"for \"{val}\""
580580
)
581581

582582
if tz is not None:
@@ -619,7 +619,7 @@ cpdef array_to_datetime(
619619
raise TypeError(f"{type(val)} is not convertible to datetime")
620620

621621
except OutOfBoundsDatetime as ex:
622-
ex.args = (str(ex) + f" present at position {i}", )
622+
ex.args = (f"at position {i}, {str(ex)}",)
623623
if is_coerce:
624624
iresult[i] = NPY_NAT
625625
continue
@@ -779,7 +779,7 @@ cdef _array_to_datetime_object(
779779
pydatetime_to_dt64(oresult[i], &dts)
780780
check_dts_bounds(&dts)
781781
except (ValueError, OverflowError) as ex:
782-
ex.args = (f"{ex} present at position {i}", )
782+
ex.args = (f"at position {i}, {ex}", )
783783
if is_coerce:
784784
oresult[i] = <object>NaT
785785
continue

pandas/_libs/tslibs/strptime.pyx

+4-5
Original file line numberDiff line numberDiff line change
@@ -236,11 +236,11 @@ def array_strptime(
236236
if exact:
237237
found = format_regex.match(val)
238238
if not found:
239-
raise ValueError(f"time data \"{val}\" at position {i} doesn't "
239+
raise ValueError(f"time data \"{val}\" doesn't "
240240
f"match format \"{fmt}\"")
241241
if len(val) != found.end():
242242
raise ValueError(
243-
f"unconverted data remains at position {i}: "
243+
f"unconverted data remains: "
244244
f'"{val[found.end():]}"'
245245
)
246246

@@ -249,7 +249,7 @@ def array_strptime(
249249
found = format_regex.search(val)
250250
if not found:
251251
raise ValueError(
252-
f"time data \"{val}\" at position {i} doesn't match "
252+
f"time data \"{val}\" doesn't match "
253253
f"format \"{fmt}\""
254254
)
255255

@@ -402,8 +402,7 @@ def array_strptime(
402402
result_timezone[i] = tz
403403

404404
except (ValueError, OutOfBoundsDatetime) as ex:
405-
if isinstance(ex, OutOfBoundsDatetime):
406-
ex.args = (f"{str(ex)} present at position {i}",)
405+
ex.args = (f"at position {i}, {str(ex)}",)
407406
if is_coerce:
408407
iresult[i] = NPY_NAT
409408
continue

pandas/tests/io/parser/test_parse_dates.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1721,7 +1721,7 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
17211721
with pytest.raises(
17221722
ValueError,
17231723
match=(
1724-
r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$'
1724+
r'^at position 1, time data "31/05/2000" doesn\'t match format "%m/%d/%Y"$'
17251725
),
17261726
):
17271727
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])

pandas/tests/tools/test_to_datetime.py

+28-29
Original file line numberDiff line numberDiff line change
@@ -481,8 +481,8 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
481481

482482
msg = "|".join(
483483
[
484-
r'^time data ".*" at position 0 doesn\'t match format ".*"$',
485-
r'^unconverted data remains at position 0: ".*"$',
484+
r'^at position 0, time data ".*" doesn\'t match format ".*"$',
485+
r'^at position 0, unconverted data remains: ".*"$',
486486
]
487487
)
488488
with pytest.raises(ValueError, match=msg):
@@ -859,7 +859,7 @@ def test_to_datetime_dt64s_and_str(self, arg, format):
859859
"dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")]
860860
)
861861
def test_to_datetime_dt64s_out_of_bounds(self, cache, dt):
862-
msg = "Out of bounds .* present at position 0"
862+
msg = "^at position 0, Out of bounds nanosecond timestamp: .*$"
863863
with pytest.raises(OutOfBoundsDatetime, match=msg):
864864
to_datetime(dt, errors="raise")
865865

@@ -1098,7 +1098,7 @@ def test_datetime_bool_arrays_mixed(self, cache):
10981098
to_datetime([False, datetime.today()], cache=cache)
10991099
with pytest.raises(
11001100
ValueError,
1101-
match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$',
1101+
match=r'^at position 1, time data "True" doesn\'t match format "%Y%m%d"$',
11021102
):
11031103
to_datetime(["20130101", True], cache=cache)
11041104
tm.assert_index_equal(
@@ -1139,10 +1139,10 @@ def test_datetime_invalid_scalar(self, value, format, warning):
11391139

11401140
msg = "|".join(
11411141
[
1142-
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1143-
r'^Given date string "a" not likely a datetime present at position 0$',
1144-
r'^unconverted data remains at position 0: "9"$',
1145-
r"^second must be in 0..59: 00:01:99 present at position 0$",
1142+
r'^at position 0, time data "a" doesn\'t match format "%H:%M:%S"$',
1143+
r'^at position 0, Given date string "a" not likely a datetime$',
1144+
r'^at position 0, unconverted data remains: "9"$',
1145+
r"^at position 0, second must be in 0..59: 00:01:99$",
11461146
]
11471147
)
11481148
with pytest.raises(ValueError, match=msg):
@@ -1164,11 +1164,11 @@ def test_datetime_outofbounds_scalar(self, value, format, warning):
11641164
assert res is NaT
11651165

11661166
if format is not None:
1167-
msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$'
1167+
msg = r'^at position 0, time data ".*" doesn\'t match format ".*"$'
11681168
with pytest.raises(ValueError, match=msg):
11691169
to_datetime(value, errors="raise", format=format)
11701170
else:
1171-
msg = "Out of bounds .* present at position 0"
1171+
msg = "^at position 0, Out of bounds .*$"
11721172
with pytest.raises(
11731173
OutOfBoundsDatetime, match=msg
11741174
), tm.assert_produces_warning(warning, match="Could not infer format"):
@@ -1190,10 +1190,10 @@ def test_datetime_invalid_index(self, values, format, warning):
11901190

11911191
msg = "|".join(
11921192
[
1193-
r'^Given date string "a" not likely a datetime present at position 0$',
1194-
r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$',
1195-
r'^unconverted data remains at position 0: "9"$',
1196-
r"^second must be in 0..59: 00:01:99 present at position 0$",
1193+
r'^at position 0, Given date string "a" not likely a datetime$',
1194+
r'^at position 0, time data "a" doesn\'t match format "%H:%M:%S"$',
1195+
r'^at position 0, unconverted data remains: "9"$',
1196+
r"^at position 0, second must be in 0..59: 00:01:99$",
11971197
]
11981198
)
11991199
with pytest.raises(ValueError, match=msg):
@@ -1373,7 +1373,7 @@ def test_to_datetime_malformed_raise(self):
13731373
ts_strings = ["200622-12-31", "111111-24-11"]
13741374
with pytest.raises(
13751375
ValueError,
1376-
match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$",
1376+
match=r"^at position 1, hour must be in 0\.\.23: 111111-24-11$",
13771377
):
13781378
with tm.assert_produces_warning(
13791379
UserWarning, match="Could not infer format"
@@ -1814,7 +1814,7 @@ def test_dataframe_coerce(self, cache):
18141814
df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]})
18151815

18161816
msg = (
1817-
r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t '
1817+
r'^cannot assemble the datetimes: at position 1, time data ".+" doesn\'t '
18181818
r'match format "%Y%m%d"$'
18191819
)
18201820
with pytest.raises(ValueError, match=msg):
@@ -1892,8 +1892,8 @@ def test_dataframe_float(self, cache):
18921892
# float
18931893
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
18941894
msg = (
1895-
r"^cannot assemble the datetimes: unconverted data remains at position "
1896-
r'0: "1"$'
1895+
r"^cannot assemble the datetimes: at position 0, unconverted data remains: "
1896+
r'"1"$'
18971897
)
18981898
with pytest.raises(ValueError, match=msg):
18991899
to_datetime(df, cache=cache)
@@ -1915,7 +1915,7 @@ def test_to_datetime_barely_out_of_bounds(self):
19151915
# in an in-bounds datetime
19161916
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
19171917

1918-
msg = "Out of bounds .* present at position 0"
1918+
msg = "^at position 0, Out of bounds .*$"
19191919
with pytest.raises(OutOfBoundsDatetime, match=msg):
19201920
with tm.assert_produces_warning(
19211921
UserWarning, match="Could not infer format"
@@ -1954,7 +1954,7 @@ def test_to_datetime_iso8601_fails(self, input, format, exact):
19541954
with pytest.raises(
19551955
ValueError,
19561956
match=(
1957-
rf"time data \"{input}\" at position 0 doesn't match format "
1957+
rf"at position 0, time data \"{input}\" doesn't match format "
19581958
rf"\"{format}\""
19591959
),
19601960
):
@@ -1976,7 +1976,7 @@ def test_to_datetime_iso8601_exact_fails(self, input, format):
19761976
with pytest.raises(
19771977
ValueError,
19781978
match=(
1979-
rf"time data \"{input}\" at position 0 doesn't match format "
1979+
rf"at position 0, time data \"{input}\" doesn't match format "
19801980
rf"\"{format}\""
19811981
),
19821982
):
@@ -2015,7 +2015,7 @@ def test_to_datetime_iso8601_separator(self, input, format):
20152015
with pytest.raises(
20162016
ValueError,
20172017
match=(
2018-
rf"time data \"{input}\" at position 0 doesn\'t match format "
2018+
rf"at position 0, time data \"{input}\" doesn\'t match format "
20192019
rf"\"{format}\""
20202020
),
20212021
):
@@ -2084,7 +2084,7 @@ def test_to_datetime_on_datetime64_series(self, cache):
20842084
def test_to_datetime_with_space_in_series(self, cache):
20852085
# GH 6428
20862086
ser = Series(["10/18/2006", "10/18/2008", " "])
2087-
msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$'
2087+
msg = r'^at position 2, time data " " doesn\'t match format "%m/%d/%Y"$'
20882088
with pytest.raises(ValueError, match=msg):
20892089
to_datetime(ser, errors="raise", cache=cache)
20902090
result_coerce = to_datetime(ser, errors="coerce", cache=cache)
@@ -2355,7 +2355,7 @@ def test_dayfirst_warnings_invalid_input(self):
23552355
with pytest.raises(
23562356
ValueError,
23572357
match=(
2358-
r'^time data "03/30/2011" at position 1 doesn\'t match format '
2358+
r'^at position 1, time data "03/30/2011" doesn\'t match format '
23592359
r'"%d/%m/%Y"$'
23602360
),
23612361
):
@@ -2426,7 +2426,7 @@ def test_to_datetime_inconsistent_format(self, cache):
24262426
data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"]
24272427
ser = Series(np.array(data))
24282428
msg = (
2429-
r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format '
2429+
r'^at position 1, time data "01-02-2011 00:00:00" doesn\'t match format '
24302430
r'"%m/%d/%Y %H:%M:%S"$'
24312431
)
24322432
with pytest.raises(ValueError, match=msg):
@@ -2552,7 +2552,7 @@ def test_day_not_in_month_raise(self, cache):
25522552

25532553
@pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"])
25542554
def test_day_not_in_month_raise_value(self, cache, arg):
2555-
msg = f'time data "{arg}" at position 0 doesn\'t match format "%Y-%m-%d"'
2555+
msg = f'at position 0, time data "{arg}" doesn\'t match format "%Y-%m-%d"'
25562556
with pytest.raises(ValueError, match=msg):
25572557
to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache)
25582558

@@ -2934,7 +2934,7 @@ def test_invalid_origins_tzinfo(self):
29342934
def test_incorrect_value_exception(self):
29352935
# GH47495
29362936
with pytest.raises(
2937-
ValueError, match="Unknown string format: yesterday present at position 1"
2937+
ValueError, match="at position 1, Unknown string format: yesterday"
29382938
):
29392939
with tm.assert_produces_warning(
29402940
UserWarning, match="Could not infer format"
@@ -2952,8 +2952,7 @@ def test_incorrect_value_exception(self):
29522952
def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
29532953
# see gh-23830
29542954
msg = (
2955-
r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 "
2956-
r"present at position 0$"
2955+
r"^at position 0, Out of bounds nanosecond timestamp: 2417-10-10 00:00:00$"
29572956
)
29582957
with pytest.raises(OutOfBoundsDatetime, match=msg):
29592958
with tm.assert_produces_warning(warning, match="Could not infer format"):

0 commit comments

Comments
 (0)