From f6819fc98414824eb9d6db38618a859375ae8415 Mon Sep 17 00:00:00 2001 From: Aashish KC Date: Sat, 4 Feb 2023 14:44:13 +0545 Subject: [PATCH 1/2] catch InvalidOperation exception from dateutil --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/_libs/tslibs/parsing.pyx | 9 +++++++-- pandas/tests/tslibs/test_parsing.py | 1 + 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index bc1cf8d03ce98..4a34daa8921db 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -1108,6 +1108,7 @@ Datetimelike - Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`) - Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with datetime or timedelta dtypes incorrectly raising ``ValueError`` (:issue:`11312`) - Bug in :func:`to_datetime` was not returning input with ``errors='ignore'`` when input was out-of-bounds (:issue:`50587`) +- Bug in :func:`to_datetime` was raising ``decimal.InvalidOperation`` when parsing date strings with ``errors='coerce'`` (:issue:`51084`) - Timedelta diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index 3a5920b74412f..e48871c537310 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -40,6 +40,8 @@ cnp.import_array() # dateutil compat +from decimal import InvalidOperation + from dateutil.parser import ( DEFAULTPARSER, parse as du_parse, @@ -646,7 +648,10 @@ cdef datetime dateutil_parse( str reso = None dict repl = {} - res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) + try: + res, _ = DEFAULTPARSER._parse(timestr, dayfirst=dayfirst, yearfirst=yearfirst) + except InvalidOperation: + res = None if res is None: raise DateParseError( @@ -891,7 +896,7 @@ def guess_datetime_format(dt_str: str, bint dayfirst=False) -> str | None: try: parsed_datetime = du_parse(dt_str, dayfirst=dayfirst) - except (ValueError, OverflowError): + except (ValueError, OverflowError, InvalidOperation): # In case the datetime can't be parsed, its format cannot be guessed return None diff --git a/pandas/tests/tslibs/test_parsing.py b/pandas/tests/tslibs/test_parsing.py index 701c11bb7f52d..6500afdf87beb 100644 --- a/pandas/tests/tslibs/test_parsing.py +++ b/pandas/tests/tslibs/test_parsing.py @@ -246,6 +246,7 @@ def test_guess_datetime_format_with_locale_specific_formats(string, fmt): "13/2019", "202001", # YYYYMM isn't ISO8601 "2020/01", # YYYY/MM isn't ISO8601 either + "87156549591102612381000001219H5", ], ) def test_guess_datetime_format_invalid_inputs(invalid_dt): From 3885971e133274049e51891dc1a871f7fe07b650 Mon Sep 17 00:00:00 2001 From: Aashish KC Date: Tue, 7 Feb 2023 17:45:34 +0545 Subject: [PATCH 2/2] add tests to verify decimal.InvalidOperation is caught --- pandas/tests/tools/test_to_datetime.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 7a93d2fe8b5ce..b930bbc2e3e69 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -2501,6 +2501,16 @@ def test_to_datetime_overflow(self): with pytest.raises(OutOfBoundsTimedelta, match=msg): date_range(start="1/1/1700", freq="B", periods=100000) + def test_string_invalid_operation(self, cache): + invalid = np.array(["87156549591102612381000001219H5"], dtype=object) + # GH #51084 + + with pytest.raises(ValueError, match="Unknown datetime string format"): + with tm.assert_produces_warning( + UserWarning, match="Could not infer format" + ): + to_datetime(invalid, errors="raise", cache=cache) + def test_string_na_nat_conversion(self, cache): # GH #999, #858