diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 252c444b2e60c..25fe1d3d541c0 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -141,6 +141,7 @@ Other API changes - The ``other`` argument in :meth:`DataFrame.mask` and :meth:`Series.mask` now defaults to ``no_default`` instead of ``np.nan`` consistent with :meth:`DataFrame.where` and :meth:`Series.where`. Entries will be filled with the corresponding NULL value (``np.nan`` for numpy dtypes, ``pd.NA`` for extension dtypes). (:issue:`49111`) - When creating a :class:`Series` with a object-dtype :class:`Index` of datetime objects, pandas no longer silently converts the index to a :class:`DatetimeIndex` (:issue:`39307`, :issue:`23598`) - :meth:`Series.unique` with dtype "timedelta64[ns]" or "datetime64[ns]" now returns :class:`TimedeltaArray` or :class:`DatetimeArray` instead of ``numpy.ndarray`` (:issue:`49176`) +- Passing strings that cannot be parsed as datetimes to :class:`Series` or :class:`DataFrame` with ``dtype="datetime64[ns]"`` will raise instead of silently ignoring the keyword and returning ``object`` dtype (:issue:`24435`) - .. --------------------------------------------------------------------------- diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 65112fc19ae56..54fa9629fecd4 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -20,7 +20,6 @@ ) import warnings -from dateutil.parser import ParserError import numpy as np from pandas._libs import lib @@ -1339,28 +1338,21 @@ def maybe_cast_to_datetime( if value.size or not is_dtype_equal(value.dtype, dtype): _disallow_mismatched_datetimelike(value, dtype) - try: - dta = sequence_to_datetimes(value) - # GH 25843: Remove tz information since the dtype - # didn't specify one - - if dta.tz is not None: - raise ValueError( - "Cannot convert timezone-aware data to " - "timezone-naive dtype. Use " - "pd.Series(values).dt.tz_localize(None) instead." - ) - - # TODO(2.0): Do this astype in sequence_to_datetimes to - # avoid potential extra copy? - dta = dta.astype(dtype, copy=False) - value = dta - - except OutOfBoundsDatetime: - raise - except ParserError: - # Note: this is dateutil's ParserError, not ours. - pass + dta = sequence_to_datetimes(value) + # GH 25843: Remove tz information since the dtype + # didn't specify one + + if dta.tz is not None: + raise ValueError( + "Cannot convert timezone-aware data to " + "timezone-naive dtype. Use " + "pd.Series(values).dt.tz_localize(None) instead." + ) + + # TODO(2.0): Do this astype in sequence_to_datetimes to + # avoid potential extra copy? + dta = dta.astype(dtype, copy=False) + value = dta elif getattr(vdtype, "kind", None) in ["m", "M"]: # we are already datetimelike and want to coerce to non-datetimelike; diff --git a/pandas/tests/frame/test_block_internals.py b/pandas/tests/frame/test_block_internals.py index ed9d7bced9253..57e498defccc1 100644 --- a/pandas/tests/frame/test_block_internals.py +++ b/pandas/tests/frame/test_block_internals.py @@ -16,7 +16,6 @@ DataFrame, Series, Timestamp, - compat, date_range, option_context, ) @@ -266,8 +265,8 @@ def f(dtype): f("float64") # 10822 - # invalid error message on dt inference - if not compat.is_platform_windows(): + msg = "Unknown string format: aa present at position 0" + with pytest.raises(ValueError, match=msg): f("M8[ns]") def test_pickle(self, float_string_frame, timezone_frame): diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 9817c758759d5..c492f894d4f7c 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -52,6 +52,16 @@ class TestSeriesConstructors: + def test_unparseable_strings_with_dt64_dtype(self): + # pre-2.0 these would be silently ignored and come back with object dtype + vals = ["aa"] + msg = "Unknown string format: aa present at position 0" + with pytest.raises(ValueError, match=msg): + Series(vals, dtype="datetime64[ns]") + + with pytest.raises(ValueError, match=msg): + Series(np.array(vals, dtype=object), dtype="datetime64[ns]") + @pytest.mark.parametrize( "constructor,check_index_type", [