Skip to content

Commit 1b7bfed

Browse files
maushumeemroeschke
andauthored
BUG: Fix for pandas.to_datetime reports incorrect index when failing (#59594)
* Remove index from error * Fix test failures * Add entry to docs/source/whatsnew * Update pandas/tests/tools/test_to_datetime.py Co-authored-by: Matthew Roeschke <[email protected]> * Update pandas/tests/tools/test_to_datetime.py Co-authored-by: Matthew Roeschke <[email protected]> --------- Co-authored-by: Matthew Roeschke <[email protected]>
1 parent 7c36579 commit 1b7bfed

File tree

8 files changed

+62
-70
lines changed

8 files changed

+62
-70
lines changed

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -555,6 +555,7 @@ Datetimelike
555555
- Bug in :meth:`DatetimeIndex.union` and :meth:`DatetimeIndex.intersection` when ``unit`` was non-nanosecond (:issue:`59036`)
556556
- Bug in :meth:`Series.dt.microsecond` producing incorrect results for pyarrow backed :class:`Series`. (:issue:`59154`)
557557
- Bug in :meth:`to_datetime` not respecting dayfirst if an uncommon date string was passed. (:issue:`58859`)
558+
- Bug in :meth:`to_datetime` reports incorrect index in case of any failure scenario. (:issue:`58298`)
558559
- Bug in setting scalar values with mismatched resolution into arrays with non-nanosecond ``datetime64``, ``timedelta64`` or :class:`DatetimeTZDtype` incorrectly truncating those scalars (:issue:`56410`)
559560

560561
Timedelta

pandas/_libs/tslib.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ cpdef array_to_datetime(
439439
raise TypeError(f"{type(val)} is not convertible to datetime")
440440

441441
except (TypeError, OverflowError, ValueError) as ex:
442-
ex.args = (f"{ex}, at position {i}",)
442+
ex.args = (f"{ex}",)
443443
if is_coerce:
444444
iresult[i] = NPY_NAT
445445
continue

pandas/_libs/tslibs/strptime.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -536,7 +536,7 @@ def array_strptime(
536536

537537
except ValueError as ex:
538538
ex.args = (
539-
f"{str(ex)}, at position {i}. You might want to try:\n"
539+
f"{str(ex)}. You might want to try:\n"
540540
" - passing `format` if your strings have a consistent format;\n"
541541
" - passing `format='ISO8601'` if your strings are "
542542
"all ISO8601 but not necessarily in exactly the same format;\n"

pandas/tests/frame/test_block_internals.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ def f(dtype):
251251
f("float64")
252252

253253
# 10822
254-
msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
254+
msg = "^Unknown datetime string format, unable to parse: aa$"
255255
with pytest.raises(ValueError, match=msg):
256256
f("M8[ns]")
257257

pandas/tests/io/parser/test_parse_dates.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -507,8 +507,8 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
507507
with pytest.raises(
508508
ValueError,
509509
match=(
510-
r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y", '
511-
r"at position 1. You might want to try:"
510+
r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y". '
511+
r"You might want to try:"
512512
),
513513
):
514514
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])

pandas/tests/series/test_constructors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ def test_infer_with_date_and_datetime(self):
8383
def test_unparsable_strings_with_dt64_dtype(self):
8484
# pre-2.0 these would be silently ignored and come back with object dtype
8585
vals = ["aa"]
86-
msg = "^Unknown datetime string format, unable to parse: aa, at position 0$"
86+
msg = "^Unknown datetime string format, unable to parse: aa$"
8787
with pytest.raises(ValueError, match=msg):
8888
Series(vals, dtype="datetime64[ns]")
8989

pandas/tests/tools/test_to_datetime.py

+54-63
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,7 @@ def test_to_datetime_format_YYYYMMDD_with_nat(self, cache):
133133
with pytest.raises(
134134
ValueError,
135135
match=(
136-
'unconverted data remains when parsing with format "%Y%m%d": ".0", '
137-
"at position 0"
136+
'unconverted data remains when parsing with format "%Y%m%d": ".0". '
138137
),
139138
):
140139
# https://github.com/pandas-dev/pandas/issues/50051
@@ -514,10 +513,9 @@ def test_to_datetime_parse_timezone_malformed(self, offset):
514513

515514
msg = "|".join(
516515
[
517-
r'^time data ".*" doesn\'t match format ".*", at position 0. '
516+
r'^time data ".*" doesn\'t match format ".*". ' f"{PARSING_ERR_MSG}$",
517+
r'^unconverted data remains when parsing with format ".*": ".*". '
518518
f"{PARSING_ERR_MSG}$",
519-
r'^unconverted data remains when parsing with format ".*": ".*", '
520-
f"at position 0. {PARSING_ERR_MSG}$",
521519
]
522520
)
523521
with pytest.raises(ValueError, match=msg):
@@ -539,7 +537,7 @@ def test_to_datetime_overflow(self):
539537
# TODO: Timestamp raises ValueError("could not convert string to Timestamp")
540538
# can we make these more consistent?
541539
arg = "08335394550"
542-
msg = 'Parsing "08335394550" to datetime overflows, at position 0'
540+
msg = 'Parsing "08335394550" to datetime overflows'
543541
with pytest.raises(OutOfBoundsDatetime, match=msg):
544542
to_datetime(arg)
545543

@@ -1309,8 +1307,8 @@ def test_datetime_bool_arrays_mixed(self, cache):
13091307
with pytest.raises(
13101308
ValueError,
13111309
match=(
1312-
r'^time data "True" doesn\'t match format "%Y%m%d", '
1313-
f"at position 1. {PARSING_ERR_MSG}$"
1310+
r'^time data "True" doesn\'t match format "%Y%m%d". '
1311+
f"{PARSING_ERR_MSG}$"
13141312
),
13151313
):
13161314
to_datetime(["20130101", True], cache=cache)
@@ -1345,12 +1343,12 @@ def test_datetime_invalid_scalar(self, value, format):
13451343

13461344
msg = "|".join(
13471345
[
1348-
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
1346+
r'^time data "a" doesn\'t match format "%H:%M:%S". '
13491347
f"{PARSING_ERR_MSG}$",
1350-
r'^Given date string "a" not likely a datetime, at position 0$',
1351-
r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
1352-
f"at position 0. {PARSING_ERR_MSG}$",
1353-
r"^second must be in 0..59: 00:01:99, at position 0$",
1348+
r'^Given date string "a" not likely a datetime$',
1349+
r'^unconverted data remains when parsing with format "%H:%M:%S": "9". '
1350+
f"{PARSING_ERR_MSG}$",
1351+
r"^second must be in 0..59: 00:01:99$",
13541352
]
13551353
)
13561354
with pytest.raises(ValueError, match=msg):
@@ -1368,7 +1366,7 @@ def test_datetime_outofbounds_scalar(self, value, format):
13681366
assert res is NaT
13691367

13701368
if format is not None:
1371-
msg = r'^time data ".*" doesn\'t match format ".*", at position 0.'
1369+
msg = r'^time data ".*" doesn\'t match format ".*"'
13721370
with pytest.raises(ValueError, match=msg):
13731371
to_datetime(value, errors="raise", format=format)
13741372
else:
@@ -1397,12 +1395,12 @@ def test_datetime_invalid_index(self, values, format):
13971395

13981396
msg = "|".join(
13991397
[
1400-
r'^Given date string "a" not likely a datetime, at position 0$',
1401-
r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0. '
1398+
r'^Given date string "a" not likely a datetime$',
1399+
r'^time data "a" doesn\'t match format "%H:%M:%S". '
1400+
f"{PARSING_ERR_MSG}$",
1401+
r'^unconverted data remains when parsing with format "%H:%M:%S": "9". '
14021402
f"{PARSING_ERR_MSG}$",
1403-
r'^unconverted data remains when parsing with format "%H:%M:%S": "9", '
1404-
f"at position 0. {PARSING_ERR_MSG}$",
1405-
r"^second must be in 0..59: 00:01:99, at position 0$",
1403+
r"^second must be in 0..59: 00:01:99$",
14061404
]
14071405
)
14081406
with pytest.raises(ValueError, match=msg):
@@ -1582,8 +1580,7 @@ def test_to_datetime_malformed_raise(self):
15821580
ts_strings = ["200622-12-31", "111111-24-11"]
15831581
msg = (
15841582
'Parsed string "200622-12-31" gives an invalid tzoffset, which must '
1585-
r"be between -timedelta\(hours=24\) and timedelta\(hours=24\), "
1586-
"at position 0"
1583+
r"be between -timedelta\(hours=24\) and timedelta\(hours=24\)"
15871584
)
15881585
with pytest.raises(
15891586
ValueError,
@@ -1748,7 +1745,7 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit):
17481745
with pytest.raises(ValueError, match=msg):
17491746
to_datetime(np.array([1.5]), unit=unit, errors="raise")
17501747

1751-
msg = r"Given date string \"1.5\" not likely a datetime, at position 0"
1748+
msg = r"Given date string \"1.5\" not likely a datetime"
17521749
with pytest.raises(ValueError, match=msg):
17531750
to_datetime(["1.5"], unit=unit, errors="raise")
17541751

@@ -1802,7 +1799,7 @@ def test_unit_array_mixed_nans_large_int(self, cache):
18021799
def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache):
18031800
# if we have a string, then we raise a ValueError
18041801
# and NOT an OutOfBoundsDatetime
1805-
msg = "Unknown datetime string format, unable to parse: foo, at position 0"
1802+
msg = "Unknown datetime string format, unable to parse: foo"
18061803
with pytest.raises(ValueError, match=msg):
18071804
to_datetime("foo", errors="raise", unit="s", cache=cache)
18081805

@@ -1938,12 +1935,9 @@ def test_to_datetime_unit_na_values(self):
19381935
@pytest.mark.parametrize("bad_val", ["foo", 111111111])
19391936
def test_to_datetime_unit_invalid(self, bad_val):
19401937
if bad_val == "foo":
1941-
msg = (
1942-
"Unknown datetime string format, unable to parse: "
1943-
f"{bad_val}, at position 2"
1944-
)
1938+
msg = "Unknown datetime string format, unable to parse: " f"{bad_val}"
19451939
else:
1946-
msg = "cannot convert input 111111111 with the unit 'D', at position 2"
1940+
msg = "cannot convert input 111111111 with the unit 'D'"
19471941
with pytest.raises(ValueError, match=msg):
19481942
to_datetime([1, 2, bad_val], unit="D")
19491943

@@ -2096,7 +2090,7 @@ def test_dataframe_coerce(self, cache):
20962090

20972091
msg = (
20982092
r'^cannot assemble the datetimes: time data ".+" doesn\'t '
2099-
r'match format "%Y%m%d", at position 1\.'
2093+
r'match format "%Y%m%d"\.'
21002094
)
21012095
with pytest.raises(ValueError, match=msg):
21022096
to_datetime(df2, cache=cache)
@@ -2174,7 +2168,7 @@ def test_dataframe_float(self, cache):
21742168
df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]})
21752169
msg = (
21762170
r"^cannot assemble the datetimes: unconverted data remains when parsing "
2177-
r'with format ".*": "1", at position 0.'
2171+
r'with format ".*": "1".'
21782172
)
21792173
with pytest.raises(ValueError, match=msg):
21802174
to_datetime(df, cache=cache)
@@ -2196,7 +2190,7 @@ def test_to_datetime_barely_out_of_bounds(self):
21962190
# in an in-bounds datetime
21972191
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
21982192

2199-
msg = "^Out of bounds nanosecond timestamp: .*, at position 0"
2193+
msg = "^Out of bounds nanosecond timestamp: .*"
22002194
with pytest.raises(OutOfBoundsDatetime, match=msg):
22012195
to_datetime(arr)
22022196

@@ -2231,10 +2225,7 @@ def test_to_datetime_iso8601_fails(self, input, format, exact):
22312225
# `format` is longer than the string, so this fails regardless of `exact`
22322226
with pytest.raises(
22332227
ValueError,
2234-
match=(
2235-
rf"time data \"{input}\" doesn't match format "
2236-
rf"\"{format}\", at position 0"
2237-
),
2228+
match=(rf"time data \"{input}\" doesn't match format " rf"\"{format}\""),
22382229
):
22392230
to_datetime(input, format=format, exact=exact)
22402231

@@ -2253,10 +2244,9 @@ def test_to_datetime_iso8601_exact_fails(self, input, format):
22532244
# `format` is shorter than the date string, so only fails with `exact=True`
22542245
msg = "|".join(
22552246
[
2256-
'^unconverted data remains when parsing with format ".*": ".*"'
2257-
f", at position 0. {PARSING_ERR_MSG}$",
2258-
f'^time data ".*" doesn\'t match format ".*", at position 0. '
2247+
'^unconverted data remains when parsing with format ".*": ".*". '
22592248
f"{PARSING_ERR_MSG}$",
2249+
f'^time data ".*" doesn\'t match format ".*". ' f"{PARSING_ERR_MSG}$",
22602250
]
22612251
)
22622252
with pytest.raises(
@@ -2297,10 +2287,7 @@ def test_to_datetime_iso8601_separator(self, input, format):
22972287
# https://github.com/pandas-dev/pandas/issues/12649
22982288
with pytest.raises(
22992289
ValueError,
2300-
match=(
2301-
rf"time data \"{input}\" doesn\'t match format "
2302-
rf"\"{format}\", at position 0"
2303-
),
2290+
match=(rf"time data \"{input}\" doesn\'t match format " rf"\"{format}\""),
23042291
):
23052292
to_datetime(input, format=format)
23062293

@@ -2390,8 +2377,7 @@ def test_to_datetime_with_space_in_series(self, cache):
23902377
# GH 6428
23912378
ser = Series(["10/18/2006", "10/18/2008", " "])
23922379
msg = (
2393-
r'^time data " " doesn\'t match format "%m/%d/%Y", '
2394-
rf"at position 2. {PARSING_ERR_MSG}$"
2380+
r'^time data " " doesn\'t match format "%m/%d/%Y". ' rf"{PARSING_ERR_MSG}$"
23952381
)
23962382
with pytest.raises(ValueError, match=msg):
23972383
to_datetime(ser, errors="raise", cache=cache)
@@ -2466,7 +2452,7 @@ def test_to_datetime_strings_vs_constructor(self, result):
24662452
def test_to_datetime_unprocessable_input(self, cache):
24672453
# GH 4928
24682454
# GH 21864
2469-
msg = '^Given date string "1" not likely a datetime, at position 1$'
2455+
msg = '^Given date string "1" not likely a datetime$'
24702456
with pytest.raises(ValueError, match=msg):
24712457
to_datetime([1, "1"], errors="raise", cache=cache)
24722458

@@ -2643,7 +2629,7 @@ def test_dayfirst_warnings_invalid_input(self):
26432629
ValueError,
26442630
match=(
26452631
r'^time data "03/30/2011" doesn\'t match format '
2646-
rf'"%d/%m/%Y", at position 1. {PARSING_ERR_MSG}$'
2632+
rf'"%d/%m/%Y". {PARSING_ERR_MSG}$'
26472633
),
26482634
):
26492635
to_datetime(arr, dayfirst=True)
@@ -2714,7 +2700,7 @@ def test_to_datetime_inconsistent_format(self, cache):
27142700
ser = Series(np.array(data))
27152701
msg = (
27162702
r'^time data "01-02-2011 00:00:00" doesn\'t match format '
2717-
rf'"%m/%d/%Y %H:%M:%S", at position 1. {PARSING_ERR_MSG}$'
2703+
rf'"%m/%d/%Y %H:%M:%S". {PARSING_ERR_MSG}$'
27182704
)
27192705
with pytest.raises(ValueError, match=msg):
27202706
to_datetime(ser, cache=cache)
@@ -2820,7 +2806,7 @@ def test_day_not_in_month_coerce(self, cache, arg, format):
28202806
assert isna(to_datetime(arg, errors="coerce", format=format, cache=cache))
28212807

28222808
def test_day_not_in_month_raise(self, cache):
2823-
msg = "day is out of range for month: 2015-02-29, at position 0"
2809+
msg = "day is out of range for month: 2015-02-29"
28242810
with pytest.raises(ValueError, match=msg):
28252811
to_datetime("2015-02-29", errors="raise", cache=cache)
28262812

@@ -2830,34 +2816,34 @@ def test_day_not_in_month_raise(self, cache):
28302816
(
28312817
"2015-02-29",
28322818
"%Y-%m-%d",
2833-
f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
2819+
f"^day is out of range for month. {PARSING_ERR_MSG}$",
28342820
),
28352821
(
28362822
"2015-29-02",
28372823
"%Y-%d-%m",
2838-
f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
2824+
f"^day is out of range for month. {PARSING_ERR_MSG}$",
28392825
),
28402826
(
28412827
"2015-02-32",
28422828
"%Y-%m-%d",
2843-
'^unconverted data remains when parsing with format "%Y-%m-%d": "2", '
2844-
f"at position 0. {PARSING_ERR_MSG}$",
2829+
'^unconverted data remains when parsing with format "%Y-%m-%d": "2". '
2830+
f"{PARSING_ERR_MSG}$",
28452831
),
28462832
(
28472833
"2015-32-02",
28482834
"%Y-%d-%m",
2849-
'^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", '
2850-
f"at position 0. {PARSING_ERR_MSG}$",
2835+
'^time data "2015-32-02" doesn\'t match format "%Y-%d-%m". '
2836+
f"{PARSING_ERR_MSG}$",
28512837
),
28522838
(
28532839
"2015-04-31",
28542840
"%Y-%m-%d",
2855-
f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
2841+
f"^day is out of range for month. {PARSING_ERR_MSG}$",
28562842
),
28572843
(
28582844
"2015-31-04",
28592845
"%Y-%d-%m",
2860-
f"^day is out of range for month, at position 0. {PARSING_ERR_MSG}$",
2846+
f"^day is out of range for month. {PARSING_ERR_MSG}$",
28612847
),
28622848
],
28632849
)
@@ -3226,9 +3212,7 @@ def test_invalid_origins_tzinfo(self):
32263212

32273213
def test_incorrect_value_exception(self):
32283214
# GH47495
3229-
msg = (
3230-
"Unknown datetime string format, unable to parse: yesterday, at position 1"
3231-
)
3215+
msg = "Unknown datetime string format, unable to parse: yesterday"
32323216
with pytest.raises(ValueError, match=msg):
32333217
to_datetime(["today", "yesterday"])
32343218

@@ -3249,7 +3233,7 @@ def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
32493233
assert res.month == 10
32503234
assert res.day == 10
32513235
else:
3252-
msg = "unconverted data remains when parsing with format.*, at position 0"
3236+
msg = "unconverted data remains when parsing with format.*"
32533237
with pytest.raises(ValueError, match=msg):
32543238
to_datetime("2417-10-10 00:00:00.00", format=format)
32553239

@@ -3473,9 +3457,7 @@ def test_to_datetime_mixed_or_iso_exact(exact, format):
34733457

34743458
def test_to_datetime_mixed_not_necessarily_iso8601_raise():
34753459
# https://github.com/pandas-dev/pandas/issues/50411
3476-
with pytest.raises(
3477-
ValueError, match="Time data 01-01-2000 is not ISO8601 format, at position 1"
3478-
):
3460+
with pytest.raises(ValueError, match="Time data 01-01-2000 is not ISO8601 format"):
34793461
to_datetime(["2020-01-01", "01-01-2000"], format="ISO8601")
34803462

34813463

@@ -3500,6 +3482,15 @@ def test_unknown_tz_raises():
35003482
to_datetime([dtstr])
35013483

35023484

3485+
def test_unformatted_input_raises():
3486+
valid, invalid = "2024-01-01", "N"
3487+
ser = Series([valid] * start_caching_at + [invalid])
3488+
msg = 'time data "N" doesn\'t match format "%Y-%m-%d"'
3489+
3490+
with pytest.raises(ValueError, match=msg):
3491+
to_datetime(ser, format="%Y-%m-%d", exact=True, cache=True)
3492+
3493+
35033494
def test_from_numeric_arrow_dtype(any_numeric_ea_dtype):
35043495
# GH 52425
35053496
pytest.importorskip("pyarrow")

pandas/tests/tslibs/test_array_to_datetime.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ def test_to_datetime_barely_out_of_bounds():
260260
# Close enough to bounds that dropping nanos
261261
# would result in an in-bounds datetime.
262262
arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object)
263-
msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$"
263+
msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16$"
264264

265265
with pytest.raises(tslib.OutOfBoundsDatetime, match=msg):
266266
tslib.array_to_datetime(arr)

0 commit comments

Comments
 (0)