From b5ac0bee693e3852711525ac95cc55f1975e5ee0 Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sun, 7 Aug 2022 03:21:08 +0530 Subject: [PATCH 1/6] #16757: improvement of to_datetime errors --- pandas/_libs/tslib.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index b3e51191e8efa..8376e7f413ad8 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -595,7 +595,7 @@ cpdef array_to_datetime( continue elif is_raise: raise ValueError( - f"time data {val} doesn't match format specified" + f"time data \"{val}\" at position {i} doesn't match format specified" ) return values, tz_out @@ -607,11 +607,11 @@ cpdef array_to_datetime( # to check if all arguments have the same tzinfo tz = py_dt.utcoffset() - except (ValueError, OverflowError): + except (ValueError, OverflowError) as err: if is_coerce: iresult[i] = NPY_NAT continue - raise TypeError("invalid string coercion to datetime") + raise type(err)(f"invalid string coercion to datetime for \"{val}\" at position {i}") if tz is not None: seen_datetime_offset = True From 9b6c357d8480ff478316d5136840d77ca2f7b96a Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Sat, 13 Aug 2022 00:23:47 +0530 Subject: [PATCH 2/6] The exceptions from to_datetime(errors='raise') could include information about the exception --- pandas/_libs/tslibs/parsing.pyx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 97a8f81094a8f..5efba513b3f38 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -286,7 +286,7 @@ def parse_datetime_string( datetime dt if not _does_string_look_like_datetime(date_string): - raise ValueError('Given date string not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime.') if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME @@ -312,7 +312,7 @@ def parse_datetime_string( except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable - raise ValueError('Given date string not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime.') return dt @@ -388,7 +388,7 @@ cdef parse_datetime_string_with_reso( int out_tzoffset if not _does_string_look_like_datetime(date_string): - raise ValueError('Given date string not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime.') parsed, reso = _parse_delimited_date(date_string, dayfirst) if parsed is not None: From ad1082197488380d0a5696ce7bc6788a3becd14c Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Mon, 15 Aug 2022 23:14:14 +0530 Subject: [PATCH 3/6] testcase updates --- pandas/_libs/tslib.pyx | 2 +- pandas/_libs/tslibs/parsing.pyx | 6 +++--- pandas/tests/tools/test_to_datetime.py | 6 +++--- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 868757a2c9679..5a29a976da89b 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -611,7 +611,7 @@ cpdef array_to_datetime( if is_coerce: iresult[i] = NPY_NAT continue - raise type(err)(f"invalid string coercion to datetime for \"{val}\" at position {i}") + raise TypeError(f"invalid string coercion to datetime for \"{val}\" at position {i}") if tz is not None: seen_datetime_offset = True diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index f0d0613ce5e85..8c223020c4012 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -286,7 +286,7 @@ def parse_datetime_string( datetime dt if not _does_string_look_like_datetime(date_string): - raise ValueError(f'Given date string {date_string} not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime') if does_string_look_like_time(date_string): # use current datetime as default, not pass _DEFAULT_DATETIME @@ -320,7 +320,7 @@ def parse_datetime_string( except TypeError: # following may be raised from dateutil # TypeError: 'NoneType' object is not iterable - raise ValueError(f'Given date string {date_string} not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime') return dt @@ -396,7 +396,7 @@ cdef parse_datetime_string_with_reso( int out_tzoffset if not _does_string_look_like_datetime(date_string): - raise ValueError(f'Given date string {date_string} not likely a datetime.') + raise ValueError(f'Given date string {date_string} not likely a datetime') parsed, reso = _parse_delimited_date(date_string, dayfirst) if parsed is not None: diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 4050817b39b88..392a0b35d9b13 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -951,7 +951,7 @@ def test_datetime_invalid_scalar(self, value, format, infer): msg = ( "is a bad directive in format|" "second must be in 0..59|" - "Given date string not likely a datetime" + f"Given date string {value} not likely a datetime" ) with pytest.raises(ValueError, match=msg): to_datetime( @@ -1003,7 +1003,7 @@ def test_datetime_invalid_index(self, values, format, infer): msg = ( "is a bad directive in format|" - "Given date string not likely a datetime|" + f"Given date string {values[0]} not likely a datetime|" "second must be in 0..59" ) with pytest.raises(ValueError, match=msg): @@ -2220,7 +2220,7 @@ def test_day_not_in_month_raise(self, cache): @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"]) def test_day_not_in_month_raise_value(self, cache, arg): - msg = f"time data {arg} doesn't match format specified" + msg = f"time data \"{arg}\" at position 0 doesn't match format specified" with pytest.raises(ValueError, match=msg): to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache) From c3bbb4f8daa897c3b0baf581fbeaeb47eeac0340 Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Mon, 15 Aug 2022 23:56:15 +0530 Subject: [PATCH 4/6] testcase updates --- pandas/tests/tools/test_to_datetime.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 392a0b35d9b13..7e698b7a6b83d 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2220,7 +2220,7 @@ def test_day_not_in_month_raise(self, cache): @pytest.mark.parametrize("arg", ["2015-02-29", "2015-02-32", "2015-04-31"]) def test_day_not_in_month_raise_value(self, cache, arg): - msg = f"time data \"{arg}\" at position 0 doesn't match format specified" + msg = f'time data "{arg}" at position 0 doesn\'t match format specified' with pytest.raises(ValueError, match=msg): to_datetime(arg, errors="raise", format="%Y-%m-%d", cache=cache) From ca3c9a4f8d288ae2d7f5406d3a81713b4a35566b Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Tue, 16 Aug 2022 10:39:20 +0530 Subject: [PATCH 5/6] testcase updates --- pandas/tests/scalar/period/test_period.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index 20d6b9e77a034..c9e28f8249c1b 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -301,7 +301,7 @@ def test_invalid_arguments(self): with pytest.raises(ValueError, match=msg): Period(month=1) - msg = "Given date string not likely a datetime" + msg = "Given date string -2000 not likely a datetime" with pytest.raises(ValueError, match=msg): Period("-2000", "A") msg = "day is out of range for month" From 7bc3367131c650cacc994cefd97afecfc9c8864c Mon Sep 17 00:00:00 2001 From: dannyi96 Date: Tue, 16 Aug 2022 10:42:01 +0530 Subject: [PATCH 6/6] testcase updates --- pandas/_libs/tslib.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index 5a29a976da89b..55057ff628619 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -607,7 +607,7 @@ cpdef array_to_datetime( # to check if all arguments have the same tzinfo tz = py_dt.utcoffset() - except (ValueError, OverflowError) as err: + except (ValueError, OverflowError): if is_coerce: iresult[i] = NPY_NAT continue