Skip to content

ERR non-ISO formats don't show position of error #50366

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ Other enhancements
- Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`)
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
-

.. ---------------------------------------------------------------------------
Expand Down
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/strptime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,8 @@ def array_strptime(

result_timezone[i] = tz

except (ValueError, OutOfBoundsDatetime):
except (ValueError, OutOfBoundsDatetime) as ex:
ex.args = (str(ex) + f" at position {i}", )
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

similar to

except (ValueError, OverflowError) as ex:
ex.args = (f"{ex} present at position {i}", )

I just removed the word 'present' as I don't think sounds good with the errors from this file

i.e.

time data ' ' does not match format '%m/%d/%Y' \(match\) at position 2

sounds better than

time data ' ' does not match format '%m/%d/%Y' \(match\) present at position 2

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit but why not use a full f-string instead of addition between 2 strings here?

if is_coerce:
iresult[i] = NPY_NAT
continue
Expand Down
5 changes: 4 additions & 1 deletion pandas/tests/io/parser/test_parse_dates.py
Original file line number Diff line number Diff line change
Expand Up @@ -1717,7 +1717,10 @@ def test_parse_multiple_delimited_dates_with_swap_warnings():
# GH46210
with pytest.raises(
ValueError,
match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$",
match=(
r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\) "
"at position 1$"
),
):
pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"])

Expand Down
27 changes: 20 additions & 7 deletions pandas/tests/tools/test_to_datetime.py
Original file line number Diff line number Diff line change
Expand Up @@ -1093,7 +1093,10 @@ def test_datetime_bool_arrays_mixed(self, cache):
to_datetime([False, datetime.today()], cache=cache)
with pytest.raises(
ValueError,
match=r"^time data 'True' does not match format '%Y%m%d' \(match\)$",
match=(
r"^time data 'True' does not match format '%Y%m%d' "
r"\(match\) at position 1$"
),
):
to_datetime(["20130101", True], cache=cache)
tm.assert_index_equal(
Expand Down Expand Up @@ -2072,7 +2075,9 @@ def test_to_datetime_on_datetime64_series(self, cache):
def test_to_datetime_with_space_in_series(self, cache):
# GH 6428
ser = Series(["10/18/2006", "10/18/2008", " "])
msg = r"^time data ' ' does not match format '%m/%d/%Y' \(match\)$"
msg = (
r"^time data ' ' does not match format '%m/%d/%Y' \(match\) at position 2$"
)
with pytest.raises(ValueError, match=msg):
to_datetime(ser, errors="raise", cache=cache)
result_coerce = to_datetime(ser, errors="coerce", cache=cache)
Expand Down Expand Up @@ -2342,7 +2347,10 @@ def test_dayfirst_warnings_invalid_input(self):

with pytest.raises(
ValueError,
match=r"time data '03/30/2011' does not match format '%d/%m/%Y' \(match\)$",
match=(
r"time data '03/30/2011' does not match format '%d/%m/%Y' "
r"\(match\) at position 1$"
),
):
to_datetime(arr, dayfirst=True)

Expand Down Expand Up @@ -2923,17 +2931,22 @@ def test_incorrect_value_exception(self):
to_datetime(["today", "yesterday"])

@pytest.mark.parametrize(
"format, warning", [(None, UserWarning), ("%Y-%m-%d %H:%M:%S", None)]
"format, warning",
[
(None, UserWarning),
("%Y-%m-%d %H:%M:%S", None),
("%Y-%d-%m %H:%M:%S", None),
],
)
def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning):
# see gh-23830
msg = (
"Out of bounds nanosecond timestamp: 2417-10-27 00:00:00 "
"present at position 0"
r"Out of bounds nanosecond timestamp: 2417-10-10 00:00:00"
r".* at position 0"
)
with pytest.raises(OutOfBoundsDatetime, match=msg):
with tm.assert_produces_warning(warning, match="Could not infer format"):
to_datetime("2417-10-27 00:00:00", format=format)
to_datetime("2417-10-10 00:00:00", format=format)
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

slightly changing the input here so we can parametrise of it with both "%Y-%m-%d %H:%M:%S" and "%Y-%d-%m %H:%M:%S" (to check ISO vs non-ISO)


@pytest.mark.parametrize(
"arg, origin, expected_str",
Expand Down