diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 976a53e9117de..48e855f7e9905 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -508,8 +508,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\"" + f"time data \"{val}\" doesn't " + f"match format \"{format}\", at position {i}" ) return values, tz_out # these must be ns unit by-definition @@ -557,8 +557,8 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data \"{val}\" at position {i} doesn't " - f"match format \"{format}\"" + f"time data \"{val}\" doesn't " + f"match format \"{format}\", at position {i}" ) return values, tz_out @@ -575,8 +575,8 @@ cpdef array_to_datetime( iresult[i] = NPY_NAT continue raise TypeError( - f"invalid string coercion to datetime for \"{val}\" " - f"at position {i}" + f"invalid string coercion to datetime " + f"for \"{val}\", at position {i}" ) if tz is not None: @@ -619,7 +619,7 @@ cpdef array_to_datetime( raise TypeError(f"{type(val)} is not convertible to datetime") except OutOfBoundsDatetime as ex: - ex.args = (str(ex) + f" present at position {i}", ) + ex.args = (f"{ex}, at position {i}",) if is_coerce: iresult[i] = NPY_NAT continue @@ -779,7 +779,7 @@ cdef _array_to_datetime_object( pydatetime_to_dt64(oresult[i], &dts) check_dts_bounds(&dts) except (ValueError, OverflowError) as ex: - ex.args = (f"{ex} present at position {i}", ) + ex.args = (f"{ex}, at position {i}", ) if is_coerce: oresult[i] = NaT continue diff --git a/pandas/_libs/tslibs/strptime.pyx b/pandas/_libs/tslibs/strptime.pyx index c1bc5fd0910f8..a863824d92cc7 100644 --- a/pandas/_libs/tslibs/strptime.pyx +++ b/pandas/_libs/tslibs/strptime.pyx @@ -236,11 +236,11 @@ def array_strptime( if exact: found = format_regex.match(val) if not found: - raise ValueError(f"time data \"{val}\" at position {i} doesn't " + raise ValueError(f"time data \"{val}\" doesn't " f"match format \"{fmt}\"") if len(val) != found.end(): raise ValueError( - f"unconverted data remains at position {i}: " + f"unconverted data remains: " f'"{val[found.end():]}"' ) @@ -249,7 +249,7 @@ def array_strptime( found = format_regex.search(val) if not found: raise ValueError( - f"time data \"{val}\" at position {i} doesn't match " + f"time data \"{val}\" doesn't match " f"format \"{fmt}\"" ) @@ -402,8 +402,7 @@ def array_strptime( result_timezone[i] = tz except (ValueError, OutOfBoundsDatetime) as ex: - if isinstance(ex, OutOfBoundsDatetime): - ex.args = (f"{str(ex)} present at position {i}",) + ex.args = (f"{str(ex)}, at position {i}",) if is_coerce: iresult[i] = NPY_NAT continue diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index a7f9b14f44674..f2de6b607d737 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -259,7 +259,7 @@ def f(dtype): f("float64") # 10822 - msg = "Unknown string format: aa present at position 0" + msg = "^Unknown string format: aa, at position 0$" with pytest.raises(ValueError, match=msg): f("M8[ns]") diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index e29c10ef6e58a..f962a552d9009 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -547,7 +547,7 @@ def test_construction_outofbounds(self): # coerces to object tm.assert_index_equal(Index(dates), exp) - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds nanosecond timestamp: 3000-01-01 00:00:00, at position 0$" with pytest.raises(OutOfBoundsDatetime, match=msg): # can't create DatetimeIndex DatetimeIndex(dates) diff --git a/pandas/tests/indexes/datetimes/test_scalar_compat.py b/pandas/tests/indexes/datetimes/test_scalar_compat.py index 42aba136f378d..be05a649ec0b6 100644 --- a/pandas/tests/indexes/datetimes/test_scalar_compat.py +++ b/pandas/tests/indexes/datetimes/test_scalar_compat.py @@ -38,7 +38,7 @@ def test_dti_date(self): @pytest.mark.parametrize("data", [["1400-01-01"], [datetime(1400, 1, 1)]]) def test_dti_date_out_of_range(self, data): # GH#1475 - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds nanosecond timestamp: 1400-01-01 00:00:00, at position 0$" with pytest.raises(OutOfBoundsDatetime, match=msg): DatetimeIndex(data) diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 766b8fe805419..ee9314c8779dd 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -1721,7 +1721,7 @@ def test_parse_multiple_delimited_dates_with_swap_warnings(): with pytest.raises( ValueError, match=( - r'^time data "31/05/2000" at position 1 doesn\'t match format "%m/%d/%Y"$' + r'^time data "31/05/2000" doesn\'t match format "%m/%d/%Y", at position 1$' ), ): pd.to_datetime(["01/01/2000", "31/05/2000", "31/05/2001", "01/02/2000"]) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index a4e82838b61d3..f856f18552594 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -76,7 +76,7 @@ def test_infer_with_date_and_datetime(self): def test_unparseable_strings_with_dt64_dtype(self): # pre-2.0 these would be silently ignored and come back with object dtype vals = ["aa"] - msg = "Unknown string format: aa present at position 0" + msg = "^Unknown string format: aa, at position 0$" with pytest.raises(ValueError, match=msg): Series(vals, dtype="datetime64[ns]") diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 927388408cf27..640daa09f5eee 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -481,8 +481,8 @@ def test_to_datetime_parse_timezone_malformed(self, offset): msg = "|".join( [ - r'^time data ".*" at position 0 doesn\'t match format ".*"$', - r'^unconverted data remains at position 0: ".*"$', + r'^time data ".*" doesn\'t match format ".*", at position 0$', + r'^unconverted data remains: ".*", at position 0$', ] ) with pytest.raises(ValueError, match=msg): @@ -859,7 +859,7 @@ def test_to_datetime_dt64s_and_str(self, arg, format): "dt", [np.datetime64("1000-01-01"), np.datetime64("5000-01-02")] ) def test_to_datetime_dt64s_out_of_bounds(self, cache, dt): - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" with pytest.raises(OutOfBoundsDatetime, match=msg): to_datetime(dt, errors="raise") @@ -1098,7 +1098,7 @@ def test_datetime_bool_arrays_mixed(self, cache): to_datetime([False, datetime.today()], cache=cache) with pytest.raises( ValueError, - match=r'^time data "True" at position 1 doesn\'t match format "%Y%m%d"$', + match=r'^time data "True" doesn\'t match format "%Y%m%d", at position 1$', ): to_datetime(["20130101", True], cache=cache) tm.assert_index_equal( @@ -1139,10 +1139,10 @@ def test_datetime_invalid_scalar(self, value, format, warning): msg = "|".join( [ - r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$', - r'^Given date string "a" not likely a datetime present at position 0$', - r'^unconverted data remains at position 0: "9"$', - r"^second must be in 0..59: 00:01:99 present at position 0$", + r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$', + r'^Given date string "a" not likely a datetime, at position 0$', + r'^unconverted data remains: "9", at position 0$', + r"^second must be in 0..59: 00:01:99, at position 0$", ] ) with pytest.raises(ValueError, match=msg): @@ -1164,11 +1164,11 @@ def test_datetime_outofbounds_scalar(self, value, format, warning): assert res is NaT if format is not None: - msg = r'^time data ".*" at position 0 doesn\'t match format ".*"$' + msg = r'^time data ".*" doesn\'t match format ".*", at position 0$' with pytest.raises(ValueError, match=msg): to_datetime(value, errors="raise", format=format) else: - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds .*, at position 0$" with pytest.raises( OutOfBoundsDatetime, match=msg ), tm.assert_produces_warning(warning, match="Could not infer format"): @@ -1190,10 +1190,10 @@ def test_datetime_invalid_index(self, values, format, warning): msg = "|".join( [ - r'^Given date string "a" not likely a datetime present at position 0$', - r'^time data "a" at position 0 doesn\'t match format "%H:%M:%S"$', - r'^unconverted data remains at position 0: "9"$', - r"^second must be in 0..59: 00:01:99 present at position 0$", + r'^Given date string "a" not likely a datetime, at position 0$', + r'^time data "a" doesn\'t match format "%H:%M:%S", at position 0$', + r'^unconverted data remains: "9", at position 0$', + r"^second must be in 0..59: 00:01:99, at position 0$", ] ) with pytest.raises(ValueError, match=msg): @@ -1373,7 +1373,7 @@ def test_to_datetime_malformed_raise(self): ts_strings = ["200622-12-31", "111111-24-11"] with pytest.raises( ValueError, - match=r"^hour must be in 0\.\.23: 111111-24-11 present at position 1$", + match=r"^hour must be in 0\.\.23: 111111-24-11, at position 1$", ): with tm.assert_produces_warning( UserWarning, match="Could not infer format" @@ -1814,8 +1814,8 @@ def test_dataframe_coerce(self, cache): df2 = DataFrame({"year": [2015, 2016], "month": [2, 20], "day": [4, 5]}) msg = ( - r'^cannot assemble the datetimes: time data ".+" at position 1 doesn\'t ' - r'match format "%Y%m%d"$' + r'^cannot assemble the datetimes: time data ".+" doesn\'t ' + r'match format "%Y%m%d", at position 1$' ) with pytest.raises(ValueError, match=msg): to_datetime(df2, cache=cache) @@ -1892,8 +1892,8 @@ def test_dataframe_float(self, cache): # float df = DataFrame({"year": [2000, 2001], "month": [1.5, 1], "day": [1, 1]}) msg = ( - r"^cannot assemble the datetimes: unconverted data remains at position " - r'0: "1"$' + r"^cannot assemble the datetimes: unconverted data remains: " + r'"1", at position 0$' ) with pytest.raises(ValueError, match=msg): to_datetime(df, cache=cache) @@ -1915,7 +1915,7 @@ def test_to_datetime_barely_out_of_bounds(self): # in an in-bounds datetime arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds nanosecond timestamp: .*, at position 0" with pytest.raises(OutOfBoundsDatetime, match=msg): with tm.assert_produces_warning( UserWarning, match="Could not infer format" @@ -1954,8 +1954,8 @@ def test_to_datetime_iso8601_fails(self, input, format, exact): with pytest.raises( ValueError, match=( - rf"time data \"{input}\" at position 0 doesn't match format " - rf"\"{format}\"" + rf"time data \"{input}\" doesn't match format " + rf"\"{format}\", at position 0" ), ): to_datetime(input, format=format, exact=exact) @@ -1976,8 +1976,8 @@ def test_to_datetime_iso8601_exact_fails(self, input, format): with pytest.raises( ValueError, match=( - rf"time data \"{input}\" at position 0 doesn't match format " - rf"\"{format}\"" + rf"time data \"{input}\" doesn't match format " + rf"\"{format}\", at position 0" ), ): to_datetime(input, format=format) @@ -2015,8 +2015,8 @@ def test_to_datetime_iso8601_separator(self, input, format): with pytest.raises( ValueError, match=( - rf"time data \"{input}\" at position 0 doesn\'t match format " - rf"\"{format}\"" + rf"time data \"{input}\" doesn\'t match format " + rf"\"{format}\", at position 0" ), ): to_datetime(input, format=format) @@ -2084,7 +2084,7 @@ def test_to_datetime_on_datetime64_series(self, cache): def test_to_datetime_with_space_in_series(self, cache): # GH 6428 ser = Series(["10/18/2006", "10/18/2008", " "]) - msg = r'^time data " " at position 2 doesn\'t match format "%m/%d/%Y"$' + msg = r'^time data " " doesn\'t match format "%m/%d/%Y", at position 2$' with pytest.raises(ValueError, match=msg): to_datetime(ser, errors="raise", cache=cache) result_coerce = to_datetime(ser, errors="coerce", cache=cache) @@ -2355,8 +2355,8 @@ def test_dayfirst_warnings_invalid_input(self): with pytest.raises( ValueError, match=( - r'^time data "03/30/2011" at position 1 doesn\'t match format ' - r'"%d/%m/%Y"$' + r'^time data "03/30/2011" doesn\'t match format ' + r'"%d/%m/%Y", at position 1$' ), ): to_datetime(arr, dayfirst=True) @@ -2426,8 +2426,8 @@ def test_to_datetime_inconsistent_format(self, cache): data = ["01/01/2011 00:00:00", "01-02-2011 00:00:00", "2011-01-03T00:00:00"] ser = Series(np.array(data)) msg = ( - r'^time data "01-02-2011 00:00:00" at position 1 doesn\'t match format ' - r'"%m/%d/%Y %H:%M:%S"$' + r'^time data "01-02-2011 00:00:00" doesn\'t match format ' + r'"%m/%d/%Y %H:%M:%S", at position 1$' ) with pytest.raises(ValueError, match=msg): to_datetime(ser, cache=cache) @@ -2550,11 +2550,49 @@ def test_day_not_in_month_raise(self, cache): ): to_datetime("2015-02-29", errors="raise", cache=cache) - @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"]) - def test_day_not_in_month_raise_value(self, cache, arg): - msg = f'time data "{arg}" at position 0 doesn\'t match format "%Y-%m-%d"' + @pytest.mark.parametrize( + "arg, format, msg", + [ + ( + "2015-02-29", + "%Y-%m-%d", + '^time data "2015-02-29" doesn\'t match format "%Y-%m-%d", ' + "at position 0$", + ), + ( + "2015-29-02", + "%Y-%d-%m", + "^day is out of range for month, at position 0$", + ), + ( + "2015-02-32", + "%Y-%m-%d", + '^time data "2015-02-32" doesn\'t match format "%Y-%m-%d", ' + "at position 0$", + ), + ( + "2015-32-02", + "%Y-%d-%m", + '^time data "2015-32-02" doesn\'t match format "%Y-%d-%m", ' + "at position 0$", + ), + ( + "2015-04-31", + "%Y-%m-%d", + '^time data "2015-04-31" doesn\'t match format "%Y-%m-%d", ' + "at position 0$", + ), + ( + "2015-31-04", + "%Y-%d-%m", + "^day is out of range for month, at position 0$", + ), + ], + ) + def test_day_not_in_month_raise_value(self, cache, arg, format, msg): + # https://github.com/pandas-dev/pandas/issues/50462 with pytest.raises(ValueError, match=msg): - to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache) + to_datetime(arg, errors="raise", format=format, cache=cache) @pytest.mark.parametrize( "expected, format, warning", @@ -2934,7 +2972,7 @@ def test_invalid_origins_tzinfo(self): def test_incorrect_value_exception(self): # GH47495 with pytest.raises( - ValueError, match="Unknown string format: yesterday present at position 1" + ValueError, match="Unknown string format: yesterday, at position 1" ): with tm.assert_produces_warning( UserWarning, match="Could not infer format" @@ -2952,8 +2990,7 @@ def test_incorrect_value_exception(self): def test_to_datetime_out_of_bounds_with_format_arg(self, format, warning): # see gh-23830 msg = ( - r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00 " - r"present at position 0$" + r"^Out of bounds nanosecond timestamp: 2417-10-10 00:00:00, at position 0$" ) with pytest.raises(OutOfBoundsDatetime, match=msg): with tm.assert_produces_warning(warning, match="Could not infer format"): diff --git a/pandas/tests/tslibs/test_array_to_datetime.py b/pandas/tests/tslibs/test_array_to_datetime.py index 80aa5d7fb1c19..63adb8427969d 100644 --- a/pandas/tests/tslibs/test_array_to_datetime.py +++ b/pandas/tests/tslibs/test_array_to_datetime.py @@ -126,7 +126,7 @@ def test_coerce_outside_ns_bounds(invalid_date, errors): kwargs = {"values": arr, "errors": errors} if errors == "raise": - msg = "Out of bounds .* present at position 0" + msg = "^Out of bounds nanosecond timestamp: .*, at position 0$" with pytest.raises(ValueError, match=msg): tslib.array_to_datetime(**kwargs) @@ -171,9 +171,7 @@ def test_to_datetime_barely_out_of_bounds(): # Close enough to bounds that dropping nanos # would result in an in-bounds datetime. arr = np.array(["2262-04-11 23:47:16.854775808"], dtype=object) - msg = ( - "Out of bounds nanosecond timestamp: 2262-04-11 23:47:16 present at position 0" - ) + msg = "^Out of bounds nanosecond timestamp: 2262-04-11 23:47:16, at position 0$" with pytest.raises(tslib.OutOfBoundsDatetime, match=msg): tslib.array_to_datetime(arr)