From 31ead07b49466f5c02dc2849274f405c86f31319 Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Fri, 20 Dec 2019 15:20:16 +0300 Subject: [PATCH 1/7] DOC/ERR: better error message on unsuccessful datetime parsing #10720 --- pandas/core/arrays/datetimes.py | 9 +++++++++ pandas/core/tools/timedeltas.py | 6 ++++-- pandas/tests/indexes/datetimes/test_tools.py | 11 +++++++++++ pandas/tests/indexes/timedeltas/test_tools.py | 9 +++++++++ 4 files changed, 33 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e41f2a840d151..3a422afe2b8e1 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1970,6 +1970,15 @@ def objects_to_datetime64ns( # return them as i8 to distinguish from wall times return values.view("i8"), tz_parsed except (ValueError, TypeError): + # GH#10720. If we failed to parse datetime then notify + # that flag errors='coerce' could be used to NaT. + # Trying to distinguish exception based on message. + if "Unknown string format" in e.args[0]: + msg = ( + " ".join(e.args) + + ". You can coerce to NaT by passing errors='coerce'" + ) + e.args = (msg,) raise e if tz_parsed is not None: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 3e185feaea38e..8e44226a8b069 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -113,9 +113,11 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): try: result = Timedelta(r, unit) - except ValueError: + except ValueError as e: if errors == "raise": - raise + msg = e.args[0] + ". You can coerce to NaT by passing errors='coerce'" + e.args = (msg,) + raise e elif errors == "ignore": return r diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 6e919571d1423..68e584160ad7b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -480,6 +480,17 @@ def test_to_datetime_unparseable_ignore(self): s = "Month 1, 1999" assert pd.to_datetime(s, errors="ignore") == s + def test_to_datetime_unparseable_raise(self): + # GH#10720 + s = "Month 1, 1999" + expected_args = ( + f"Unknown string format: {s}. " + f"You can coerce to NaT by passing errors='coerce'" + ) + + with pytest.raises(ValueError, match=expected_args): + pd.to_datetime(s, errors="raise") + @td.skip_if_windows # `tm.set_timezone` does not work in windows def test_to_datetime_now(self): # See GH#18666 diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 5bd7a2a583b84..31ef5c5bda093 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -111,6 +111,15 @@ def test_to_timedelta_invalid(self): invalid_data, to_timedelta(invalid_data, errors="ignore") ) + # GH#10720 + invalid_data = "some_nonesense" + expected_msg = ( + "unit abbreviation w/o a number. " + "You can coerce to NaT by passing errors='coerce'" + ) + with pytest.raises(ValueError, match=expected_msg): + to_timedelta(invalid_data, errors="raise") + def test_to_timedelta_via_apply(self): # GH 5458 expected = Series([np.timedelta64(1, "s")]) From ad0bb557b855f1dc5877fdac73181640a8539e1b Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Sun, 5 Jan 2020 20:50:52 +0300 Subject: [PATCH 2/7] renamed 1 letter variable in tests --- pandas/tests/indexes/datetimes/test_tools.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 68e584160ad7b..b7ada04701e2b 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -482,14 +482,14 @@ def test_to_datetime_unparseable_ignore(self): def test_to_datetime_unparseable_raise(self): # GH#10720 - s = "Month 1, 1999" + invalid_data = "Month 1, 1999" expected_args = ( - f"Unknown string format: {s}. " + f"Unknown string format: {invalid_data}. " f"You can coerce to NaT by passing errors='coerce'" ) with pytest.raises(ValueError, match=expected_args): - pd.to_datetime(s, errors="raise") + pd.to_datetime(invalid_data, errors="raise") @td.skip_if_windows # `tm.set_timezone` does not work in windows def test_to_datetime_now(self): From a413f46c9fba3344e390cf207a49f0ba619ab18a Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Mon, 6 Jan 2020 23:54:15 +0300 Subject: [PATCH 3/7] f-formatting ... raise from ... --- pandas/core/arrays/datetimes.py | 8 +++----- pandas/core/tools/timedeltas.py | 5 ++--- pandas/tests/indexes/datetimes/test_tools.py | 2 +- pandas/tests/indexes/timedeltas/test_tools.py | 2 +- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 3a422afe2b8e1..435dc5375646a 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1973,12 +1973,10 @@ def objects_to_datetime64ns( # GH#10720. If we failed to parse datetime then notify # that flag errors='coerce' could be used to NaT. # Trying to distinguish exception based on message. + msg = " ".join(e.args) if "Unknown string format" in e.args[0]: - msg = ( - " ".join(e.args) - + ". You can coerce to NaT by passing errors='coerce'" - ) - e.args = (msg,) + msg = f"{msg}. You can coerce to NaT by passing errors='coerce'" + raise ValueError(msg) from e raise e if tz_parsed is not None: diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 8e44226a8b069..8b95dbd0763e5 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -115,9 +115,8 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): result = Timedelta(r, unit) except ValueError as e: if errors == "raise": - msg = e.args[0] + ". You can coerce to NaT by passing errors='coerce'" - e.args = (msg,) - raise e + msg = f"{e.args[0]}. You can coerce to NaT by passing errors='coerce'" + raise ValueError(msg) from e elif errors == "ignore": return r diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index b7ada04701e2b..b218b8cc875b0 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -485,7 +485,7 @@ def test_to_datetime_unparseable_raise(self): invalid_data = "Month 1, 1999" expected_args = ( f"Unknown string format: {invalid_data}. " - f"You can coerce to NaT by passing errors='coerce'" + "You can coerce to NaT by passing errors='coerce'" ) with pytest.raises(ValueError, match=expected_args): diff --git a/pandas/tests/indexes/timedeltas/test_tools.py b/pandas/tests/indexes/timedeltas/test_tools.py index 31ef5c5bda093..2f2c86a13f040 100644 --- a/pandas/tests/indexes/timedeltas/test_tools.py +++ b/pandas/tests/indexes/timedeltas/test_tools.py @@ -118,7 +118,7 @@ def test_to_timedelta_invalid(self): "You can coerce to NaT by passing errors='coerce'" ) with pytest.raises(ValueError, match=expected_msg): - to_timedelta(invalid_data, errors="raise") + pd.to_timedelta(invalid_data, errors="raise") def test_to_timedelta_via_apply(self): # GH 5458 From 5ebe4988802ea2c209a3139d259163101fbbedac Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Tue, 7 Jan 2020 01:08:37 +0300 Subject: [PATCH 4/7] tests From 3cca64b2609157992f2eaeaf62bba2a552f32bb6 Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Tue, 7 Jan 2020 01:09:08 +0300 Subject: [PATCH 5/7] tests From e30c1469fa39da0e3350e0f9cb7f0bb3fd375e14 Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Tue, 7 Jan 2020 16:59:38 +0300 Subject: [PATCH 6/7] e.args cast to str --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 4e6c4c1e9c25d..1fb612dcacfc9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1962,7 +1962,7 @@ def objects_to_datetime64ns( # GH#10720. If we failed to parse datetime then notify # that flag errors='coerce' could be used to NaT. # Trying to distinguish exception based on message. - msg = " ".join(e.args) + msg = " ".join([str(arg) for arg in e.args]) if "Unknown string format" in e.args[0]: msg = f"{msg}. You can coerce to NaT by passing errors='coerce'" raise ValueError(msg) from e From 8fe73c9f3b4c1519cd66d0d01954ac8605092d77 Mon Sep 17 00:00:00 2001 From: Petr Baev Date: Sat, 18 Jan 2020 05:54:33 +0300 Subject: [PATCH 7/7] workaround with err message --- pandas/core/arrays/datetimes.py | 14 +++++++------- pandas/core/tools/timedeltas.py | 7 ++++--- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 1fb612dcacfc9..d7feb194af11e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -1952,7 +1952,7 @@ def objects_to_datetime64ns( yearfirst=yearfirst, require_iso8601=require_iso8601, ) - except ValueError as e: + except ValueError as err: try: values, tz_parsed = conversion.datetime_to_datetime64(data) # If tzaware, these values represent unix timestamps, so we @@ -1960,13 +1960,13 @@ def objects_to_datetime64ns( return values.view("i8"), tz_parsed except (ValueError, TypeError): # GH#10720. If we failed to parse datetime then notify - # that flag errors='coerce' could be used to NaT. - # Trying to distinguish exception based on message. - msg = " ".join([str(arg) for arg in e.args]) - if "Unknown string format" in e.args[0]: + # that flag errors='coerce' could be used to NaT. + # Trying to distinguish exception based on message. + if "Unknown string format" in err.args[0]: + msg = f"Unknown string format: {err.args[1]}" msg = f"{msg}. You can coerce to NaT by passing errors='coerce'" - raise ValueError(msg) from e - raise e + raise ValueError(msg).with_traceback(err.__traceback__) + raise if tz_parsed is not None: # We can take a shortcut since the datetime64 numpy array diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 8b95dbd0763e5..1f205aff6db0a 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -113,10 +113,11 @@ def _coerce_scalar_to_timedelta_type(r, unit="ns", errors="raise"): try: result = Timedelta(r, unit) - except ValueError as e: + except ValueError as err: if errors == "raise": - msg = f"{e.args[0]}. You can coerce to NaT by passing errors='coerce'" - raise ValueError(msg) from e + # GH#10720 + msg = f"{err.args[0]}. You can coerce to NaT by passing errors='coerce'" + raise ValueError(msg).with_traceback(err.__traceback__) elif errors == "ignore": return r