diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9840e4e94d28c..bff0eaee96ff5 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -115,10 +115,10 @@ Datetimelike - Bug in :meth:`Series.__setitem__` incorrectly casting ``np.timedelta64("NaT")`` to ``np.datetime64("NaT")`` when inserting into a :class:`Series` with datetime64 dtype (:issue:`27311`) - Bug in :meth:`Series.dt` property lookups when the underlying data is read-only (:issue:`27529`) - Bug in ``HDFStore.__getitem__`` incorrectly reading tz attribute created in Python 2 (:issue:`26443`) +- Bug in :func:`to_datetime` where passing arrays of malformed ``str`` with errors="coerce" could incorrectly lead to raising ``ValueError`` (:issue:`28299`) - Bug in :meth:`pandas.core.groupby.SeriesGroupBy.nunique` where ``NaT`` values were interfering with the count of unique values (:issue:`27951`) - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) -- Timedelta diff --git a/pandas/_libs/tslib.pyx b/pandas/_libs/tslib.pyx index dc06a30004d19..06525fbe98cf4 100644 --- a/pandas/_libs/tslib.pyx +++ b/pandas/_libs/tslib.pyx @@ -609,6 +609,10 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', py_dt = parse_datetime_string(val, dayfirst=dayfirst, yearfirst=yearfirst) + # If the dateutil parser returned tzinfo, capture it + # to check if all arguments have the same tzinfo + tz = py_dt.utcoffset() + except Exception: if is_coerce: iresult[i] = NPY_NAT @@ -616,9 +620,6 @@ cpdef array_to_datetime(ndarray[object] values, str errors='raise', raise TypeError("invalid string coercion to " "datetime") - # If the dateutil parser returned tzinfo, capture it - # to check if all arguments have the same tzinfo - tz = py_dt.utcoffset() if tz is not None: seen_datetime_offset = 1 # dateutil timezone objects cannot be hashed, so diff --git a/pandas/tests/indexes/datetimes/test_tools.py b/pandas/tests/indexes/datetimes/test_tools.py index 9af0f47f6dce9..4e1b5571f054e 100644 --- a/pandas/tests/indexes/datetimes/test_tools.py +++ b/pandas/tests/indexes/datetimes/test_tools.py @@ -901,6 +901,13 @@ def test_to_datetime_coerce(self): ) tm.assert_index_equal(result, expected) + def test_to_datetime_coerce_malformed(self): + # GH 28299 + ts_strings = ["200622-12-31", "111111-24-11"] + result = to_datetime(ts_strings, errors="coerce") + expected = Index([NaT, NaT]) + tm.assert_index_equal(result, expected) + def test_iso_8601_strings_with_same_offset(self): # GH 17697, 11736 ts_str = "2015-11-18 15:30:00+05:30"