From 303a648d522a8301a7b57336100451ec490d3328 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Tue, 20 Dec 2022 18:51:33 +0000 Subject: [PATCH 1/7] wip --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslibs/strptime.pyx | 3 ++- pandas/tests/io/parser/test_parse_dates.py | 5 +++- pandas/tests/tools/test_to_datetime.py | 27 ++++++++++++++++------ 4 files changed, 27 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 208bbfa10b9b2..68476f7fa144b 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -98,6 +98,7 @@ Other enhancements - Added :meth:`Index.infer_objects` analogous to :meth:`Series.infer_objects` (:issue:`50034`) - Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`) - :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`) +- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`) - .. --------------------------------------------------------------------------- diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 73e9176d3a6d2..06df08d7ddb1d 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -396,7 +396,8 @@ def array_strptime( result_timezone[i] = tz - except (ValueError, OutOfBoundsDatetime): + except (ValueError, OutOfBoundsDatetime) as ex: + ex.args = (str(ex) + f" at position {i}", ) if is_coerce: iresult[i] = NPY_NAT continue diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index c366613c2815f..dd1d4e58c8244 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1717,7 +1717,10 @@ def test_parse_multiple_delimited_dates_with_swap_warnings(): # GH46210 with pytest.raises( ValueError, - match=r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\)$", + match=( + r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\) " + "at position 1$" + ), ): pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 83e40f5f1d98b..b0277f04e360d 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -1093,7 +1093,10 @@ def test_datetime_bool_arrays_mixed(self, cache): to_datetime([False, datetime.today()], cache=cache) with pytest.raises( ValueError, - match=r"^time data 'True' does not match format '%Y%m%d' \(match\)$", + match=( + r"^time data 'True' does not match format '%Y%m%d' " + r"\(match\) at position 1$" + ), ): to_datetime(["20130101", True], cache=cache) tm.assert_index_equal( @@ -2072,7 +2075,9 @@ def test_to_datetime_on_datetime64_series(self, cache): def test_to_datetime_with_space_in_series(self, cache): # GH 6428 ser = Series(["10/18/2006", "10/18/2008", " "]) - msg = r"^time data ' ' does not match format '%m/%d/%Y' \(match\)$" + msg = ( + r"^time data ' ' does not match format '%m/%d/%Y' \(match\) at position 2$" + ) with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) result_coerce = to_datetime(ser, errors="coerce", cache=cache) @@ -2342,7 +2347,10 @@ def test_dayfirst_warnings_invalid_input(self): with pytest.raises( ValueError, - match=r"time data '03/30/2011' does not match format '%d/%m/%Y' \(match\)$", + match=( + r"time data '03/30/2011' does not match format '%d/%m/%Y' " + r"\(match\) at position 1$" + ), ): to_datetime(arr, dayfirst=True) @@ -2923,17 +2931,22 @@ def test_incorrect_value_exception(self): to_datetime(["today", "yesterday"]) @pytest.mark.parametrize( - "format, warning", [(None, UserWarning), ("%Y-%m-%d %H:%M:%S", None)] + "format, warning", + [ + (None, UserWarning), + ("%Y-%m-%d %H:%M:%S", None), + ("%Y-%d-%m %H:%M:%S", None), + ], ) def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning): # see gh-23830 msg = ( - "Out of bounds nanosecond timestamp: 2417-10-27 00:00:00 " - "present at position 0" + r"Out of bounds nanosecond timestamp: 2417-10-10 00:00:00" + r".* at position 0" ) with pytest.raises(OutOfBoundsDatetime, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): - to_datetime("2417-10-27 00:00:00", format=format) + to_datetime("2417-10-10 00:00:00", format=format) @pytest.mark.parametrize( "arg, origin, expected_str", From 8e40df407f87b1ccfcc90838a8a0fccc8cd3ae6f Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 21 Dec 2022 10:16:10 +0000 Subject: [PATCH 2/7] unify messages --- pandas/_libs/tslib.pyx | 6 ++- pandas/_libs/tslibs/parsing.pyx | 6 +-- pandas/_libs/tslibs/strptime.pyx | 26 ++++++++---- pandas/tests/io/parser/test_parse_dates.py | 4 +- pandas/tests/tools/test_to_datetime.py | 46 +++++++++++++--------- 5 files changed, 55 insertions(+), 33 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index c1a30e03235b5..ac1679ef66bc7 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -515,9 +515,10 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT continue elif is_raise: + match_msg = "match" if exact else "search" raise ValueError( f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\"" + f"match format \"{format}\" ({match_msg})" ) return values, tz_out # these must be ns unit by-definition @@ -564,9 +565,10 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT continue elif is_raise: + match_msg = "match" if exact else "search" raise ValueError( f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\"" + f"match format \"{format}\" ({match_msg})" ) return values, tz_out diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 614db69425f4c..f384e85fb5eb5 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -263,7 +263,7 @@ def parse_datetime_string( datetime dt if not _does_string_look_like_datetime(date_string): - raise ValueError(f"Given date string {date_string} not likely a datetime") + raise ValueError(f'Given date string "{date_string}" not likely a datetime') if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME @@ -297,7 +297,7 @@ def parse_datetime_string( except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable - raise ValueError(f"Given date string {date_string} not likely a datetime") + raise ValueError(f'Given date string "{date_string}" not likely a datetime') return dt @@ -373,7 +373,7 @@ cdef parse_datetime_string_with_reso( int out_tzoffset if not _does_string_look_like_datetime(date_string): - raise ValueError(f"Given date string {date_string} not likely a datetime") + raise ValueError(f'Given date string "{date_string}" not likely a datetime') parsed, reso = _parse_delimited_date(date_string, dayfirst) if parsed is not None: diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 06df08d7ddb1d..196fbd40f0a02 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -236,17 +236,22 @@ def array_strptime( if exact: found = format_regex.match(val) if not found: - raise ValueError(f"time data '{val}' does not match " - f"format '{fmt}' (match)") + raise ValueError(f"time data \"{val}\" at position {i} doesn't " + f"match format \"{fmt}\" (match)") if len(val) != found.end(): - raise ValueError(f"unconverted data remains: {val[found.end():]}") + raise ValueError( + f"unconverted data remains at position {i}: " + f'"{val[found.end():]}"' + ) # search else: found = format_regex.search(val) if not found: - raise ValueError(f"time data {repr(val)} does not match format " - f"{repr(fmt)} (search)") + raise ValueError( + f"time data \"{val}\" at position {i} doesn't match " + f"format \"{fmt}\" (search)" + ) iso_year = -1 year = 1900 @@ -396,8 +401,15 @@ def array_strptime( result_timezone[i] = tz - except (ValueError, OutOfBoundsDatetime) as ex: - ex.args = (str(ex) + f" at position {i}", ) + except OutOfBoundsDatetime as ex: + ex.args = (f"{str(ex)} present at position {i}",) + if is_coerce: + iresult[i] = NPY_NAT + continue + elif is_raise: + raise + return values, [] + except ValueError: if is_coerce: iresult[i] = NPY_NAT continue diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index dd1d4e58c8244..c62e321707bfc 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1718,8 +1718,8 @@ def test_parse_multiple_delimited_dates_with_swap_warnings(): with pytest.raises( ValueError, match=( - r"^time data '31/05/2000' does not match format '%m/%d/%Y' \(match\) " - "at position 1$" + r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y" ' + r"\(match\)$" ), ): pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index b0277f04e360d..e9a318b908767 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -479,7 +479,10 @@ def test_to_datetime_parse_timezone_malformed(self, offset): fmt = "%Y-%m-%d %H:%M:%S %z" date = "2010-01-01 12:00:00 " + offset - msg = "does not match format|unconverted data remains" + msg = ( + r'time data ".*" at position 0 doesn\'t match format ".*" \(match\)|' + r'unconverted data remains at position 0: ".*"' + ) with pytest.raises(ValueError, match=msg): to_datetime([date], format=fmt) @@ -1094,8 +1097,8 @@ def test_datetime_bool_arrays_mixed(self, cache): with pytest.raises( ValueError, match=( - r"^time data 'True' does not match format '%Y%m%d' " - r"\(match\) at position 1$" + r'^time data "True" at position 1 doesn\'t match format "%Y%m%d" ' + r"\(match\)$" ), ): to_datetime(["20130101", True], cache=cache) @@ -1136,10 +1139,8 @@ def test_datetime_invalid_scalar(self, value, format, warning): assert res is NaT msg = ( - "does not match format|" - "unconverted data remains:|" - "second must be in 0..59|" - f"Given date string {value} not likely a datetime" + r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)$|' + r'^Given date string "a" not likely a datetime$|' ) with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): @@ -1160,7 +1161,7 @@ def test_datetime_outofbounds_scalar(self, value, format, warning): assert res is NaT if format is not None: - msg = "does not match format|Out of bounds .* present at position 0" + msg = r'^time data ".*" at position 0 doesn\'t match format ".*" \(match\)$' with pytest.raises(ValueError, match=msg): to_datetime(value, errors="raise", format=format) else: @@ -1185,10 +1186,10 @@ def test_datetime_invalid_index(self, values, format, warning): tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values))) msg = ( - "does not match format|" - "unconverted data remains:|" - f"Given date string {values[0]} not likely a datetime|" - "second must be in 0..59" + r'^(Given date string "a" not likely a datetime present at position 0|' + r'time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)|' + r'unconverted data remains at position 0: "9"|' + r"second must be in 0..59: 00:01:99 present at position 0)$" ) with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): @@ -1808,8 +1809,8 @@ def test_dataframe_coerce(self, cache): df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) msg = ( - "cannot assemble the datetimes: time data .+ does not " - r"match format '%Y%m%d' \(match\)" + r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t ' + r'match format "%Y%m%d" \(match\)$' ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @@ -1885,7 +1886,10 @@ def test_dataframe_mixed(self, cache): def test_dataframe_float(self, cache): # float df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) - msg = "cannot assemble the datetimes: unconverted data remains: 1" + msg = ( + r"^cannot assemble the datetimes: unconverted data remains at position " + r'0: "1"$' + ) with pytest.raises(ValueError, match=msg): to_datetime(df, cache=cache) @@ -2076,7 +2080,7 @@ def test_to_datetime_with_space_in_series(self, cache): # GH 6428 ser = Series(["10/18/2006", "10/18/2008", " "]) msg = ( - r"^time data ' ' does not match format '%m/%d/%Y' \(match\) at position 2$" + r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y" \(match\)$' ) with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) @@ -2348,8 +2352,8 @@ def test_dayfirst_warnings_invalid_input(self): with pytest.raises( ValueError, match=( - r"time data '03/30/2011' does not match format '%d/%m/%Y' " - r"\(match\) at position 1$" + r'^time data "03/30/2011" at position 1 doesn\'t match format ' + r'"%d/%m/%Y" \(match\)$' ), ): to_datetime(arr, dayfirst=True) @@ -2418,7 +2422,11 @@ def test_to_datetime_infer_datetime_format_consistent_format( def test_to_datetime_inconsistent_format(self, cache): data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"] ser = Series(np.array(data)) - with pytest.raises(ValueError, match="does not match format"): + msg = ( + r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format ' + r'"%m/%d/%Y %H:%M:%S" \(match\)$' + ) + with pytest.raises(ValueError, match=msg): to_datetime(ser, cache=cache) def test_to_datetime_consistent_format(self, cache): From 9040dcaf41300eebba9bb8c184dea759f61a8db9 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 21 Dec 2022 10:33:36 +0000 Subject: [PATCH 3/7] fixup regexes --- pandas/tests/tools/test_to_datetime.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index e9a318b908767..7e0b7a22ed978 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -480,8 +480,8 @@ def test_to_datetime_parse_timezone_malformed(self, offset): date = "2010-01-01 12:00:00 " + offset msg = ( - r'time data ".*" at position 0 doesn\'t match format ".*" \(match\)|' - r'unconverted data remains at position 0: ".*"' + r'^(time data ".*" at position 0 doesn\'t match format ".*" \(match\)|' + r'unconverted data remains at position 0: ".*")$' ) with pytest.raises(ValueError, match=msg): to_datetime([date], format=fmt) @@ -1139,8 +1139,10 @@ def test_datetime_invalid_scalar(self, value, format, warning): assert res is NaT msg = ( - r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)$|' - r'^Given date string "a" not likely a datetime$|' + r'^(time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)|' + r'Given date string "a" not likely a datetime present at position 0|' + r'unconverted data remains at position 0: "9"|' + r"second must be in 0..59: 00:01:99 present at position 0)$" ) with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): @@ -2949,8 +2951,8 @@ def test_incorrect_value_exception(self): def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning): # see gh-23830 msg = ( - r"Out of bounds nanosecond timestamp: 2417-10-10 00:00:00" - r".* at position 0" + r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00" + r".* at position 0$" ) with pytest.raises(OutOfBoundsDatetime, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): From c49640d51da23769faff471ba833ec5e6ef10779 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 21 Dec 2022 10:36:41 +0000 Subject: [PATCH 4/7] simplify --- pandas/tests/tools/test_to_datetime.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7e0b7a22ed978..febb35aa97d79 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2951,8 +2951,8 @@ def test_incorrect_value_exception(self): def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning): # see gh-23830 msg = ( - r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00" - r".* at position 0$" + r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 " + r"present at position 0$" ) with pytest.raises(OutOfBoundsDatetime, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): From 1e7613f1e8db27d93a26840ee3fa908e05e0b310 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Wed, 21 Dec 2022 11:23:39 +0000 Subject: [PATCH 5/7] fix test --- pandas/tests/scalar/period/test_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 112f23b3b0f16..0636ecb023530 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -303,7 +303,7 @@ def test_invalid_arguments(self): with pytest.raises(ValueError, match=msg): Period(month=1) - msg = "Given date string -2000 not likely a datetime" + msg = '^Given date string "-2000" not likely a datetime$' with pytest.raises(ValueError, match=msg): Period("-2000", "A") msg = "day is out of range for month" From 3338b431253e6b1c3aae9eb3adea2370574bb487 Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Thu, 22 Dec 2022 09:27:57 +0000 Subject: [PATCH 6/7] remove (search) and (match) from messages --- pandas/_libs/tslib.pyx | 6 +-- pandas/_libs/tslibs/strptime.pyx | 4 +- pandas/tests/io/parser/test_parse_dates.py | 3 +- pandas/tests/tools/test_to_datetime.py | 49 +++++++++++----------- 4 files changed, 30 insertions(+), 32 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index ac1679ef66bc7..c1a30e03235b5 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -515,10 +515,9 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT continue elif is_raise: - match_msg = "match" if exact else "search" raise ValueError( f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\" ({match_msg})" + f"match format \"{format}\"" ) return values, tz_out # these must be ns unit by-definition @@ -565,10 +564,9 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT continue elif is_raise: - match_msg = "match" if exact else "search" raise ValueError( f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\" ({match_msg})" + f"match format \"{format}\"" ) return values, tz_out diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 196fbd40f0a02..72205599e8108 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -237,7 +237,7 @@ def array_strptime( found = format_regex.match(val) if not found: raise ValueError(f"time data \"{val}\" at position {i} doesn't " - f"match format \"{fmt}\" (match)") + f"match format \"{fmt}\"") if len(val) != found.end(): raise ValueError( f"unconverted data remains at position {i}: " @@ -250,7 +250,7 @@ def array_strptime( if not found: raise ValueError( f"time data \"{val}\" at position {i} doesn't match " - f"format \"{fmt}\" (search)" + f"format \"{fmt}\"" ) iso_year = -1 diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index bf881eb928c2d..597ecc877e30c 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1719,8 +1719,7 @@ def test_parse_multiple_delimited_dates_with_swap_warnings(): with pytest.raises( ValueError, match=( - r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y" ' - r"\(match\)$" + r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$' ), ): pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index febb35aa97d79..927388408cf27 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -479,9 +479,11 @@ def test_to_datetime_parse_timezone_malformed(self, offset): fmt = "%Y-%m-%d %H:%M:%S %z" date = "2010-01-01 12:00:00 " + offset - msg = ( - r'^(time data ".*" at position 0 doesn\'t match format ".*" \(match\)|' - r'unconverted data remains at position 0: ".*")$' + msg = "|".join( + [ + r'^time data ".*" at position 0 doesn\'t match format ".*"$', + r'^unconverted data remains at position 0: ".*"$', + ] ) with pytest.raises(ValueError, match=msg): to_datetime([date], format=fmt) @@ -1096,10 +1098,7 @@ def test_datetime_bool_arrays_mixed(self, cache): to_datetime([False, datetime.today()], cache=cache) with pytest.raises( ValueError, - match=( - r'^time data "True" at position 1 doesn\'t match format "%Y%m%d" ' - r"\(match\)$" - ), + match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$', ): to_datetime(["20130101", True], cache=cache) tm.assert_index_equal( @@ -1138,11 +1137,13 @@ def test_datetime_invalid_scalar(self, value, format, warning): res = to_datetime(value, errors="coerce", format=format) assert res is NaT - msg = ( - r'^(time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)|' - r'Given date string "a" not likely a datetime present at position 0|' - r'unconverted data remains at position 0: "9"|' - r"second must be in 0..59: 00:01:99 present at position 0)$" + msg = "|".join( + [ + r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$', + r'^Given date string "a" not likely a datetime present at position 0$', + r'^unconverted data remains at position 0: "9"$', + r"^second must be in 0..59: 00:01:99 present at position 0$", + ] ) with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): @@ -1163,7 +1164,7 @@ def test_datetime_outofbounds_scalar(self, value, format, warning): assert res is NaT if format is not None: - msg = r'^time data ".*" at position 0 doesn\'t match format ".*" \(match\)$' + msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$' with pytest.raises(ValueError, match=msg): to_datetime(value, errors="raise", format=format) else: @@ -1187,11 +1188,13 @@ def test_datetime_invalid_index(self, values, format, warning): res = to_datetime(values, errors="coerce", format=format) tm.assert_index_equal(res, DatetimeIndex([NaT] * len(values))) - msg = ( - r'^(Given date string "a" not likely a datetime present at position 0|' - r'time data "a" at position 0 doesn\'t match format "%H:%M:%S" \(match\)|' - r'unconverted data remains at position 0: "9"|' - r"second must be in 0..59: 00:01:99 present at position 0)$" + msg = "|".join( + [ + r'^Given date string "a" not likely a datetime present at position 0$', + r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$', + r'^unconverted data remains at position 0: "9"$', + r"^second must be in 0..59: 00:01:99 present at position 0$", + ] ) with pytest.raises(ValueError, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): @@ -1812,7 +1815,7 @@ def test_dataframe_coerce(self, cache): msg = ( r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t ' - r'match format "%Y%m%d" \(match\)$' + r'match format "%Y%m%d"$' ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @@ -2081,9 +2084,7 @@ def test_to_datetime_on_datetime64_series(self, cache): def test_to_datetime_with_space_in_series(self, cache): # GH 6428 ser = Series(["10/18/2006", "10/18/2008", " "]) - msg = ( - r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y" \(match\)$' - ) + msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$' with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) result_coerce = to_datetime(ser, errors="coerce", cache=cache) @@ -2355,7 +2356,7 @@ def test_dayfirst_warnings_invalid_input(self): ValueError, match=( r'^time data "03/30/2011" at position 1 doesn\'t match format ' - r'"%d/%m/%Y" \(match\)$' + r'"%d/%m/%Y"$' ), ): to_datetime(arr, dayfirst=True) @@ -2426,7 +2427,7 @@ def test_to_datetime_inconsistent_format(self, cache): ser = Series(np.array(data)) msg = ( r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format ' - r'"%m/%d/%Y %H:%M:%S" \(match\)$' + r'"%m/%d/%Y %H:%M:%S"$' ) with pytest.raises(ValueError, match=msg): to_datetime(ser, cache=cache) From 34a7138f2de2e0f8b589e5fce47bcb7f9385428c Mon Sep 17 00:00:00 2001 From: MarcoGorelli <> Date: Sat, 24 Dec 2022 17:06:14 +0000 Subject: [PATCH 7/7] simplify --- pandas/_libs/tslibs/strptime.pyx | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index 72205599e8108..c1bc5fd0910f8 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -401,15 +401,9 @@ def array_strptime( result_timezone[i] = tz - except OutOfBoundsDatetime as ex: - ex.args = (f"{str(ex)} present at position {i}",) - if is_coerce: - iresult[i] = NPY_NAT - continue - elif is_raise: - raise - return values, [] - except ValueError: + except (ValueError, OutOfBoundsDatetime) as ex: + if isinstance(ex, OutOfBoundsDatetime): + ex.args = (f"{str(ex)} present at position {i}",) if is_coerce: iresult[i] = NPY_NAT continue