diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index a6b74865f6619..a897f364d8066 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -600,6 +600,7 @@ Datetimelike - Bug in :meth:`isin` for datetimelike indexes; :class:`DatetimeIndex`, :class:`TimedeltaIndex` and :class:`PeriodIndex` where the ``levels`` parameter was ignored. (:issue:`26675`) - Bug in :func:`to_datetime` which raises ``TypeError`` for ``format='%Y%m%d'`` when called for invalid integer dates with length >= 6 digits with ``errors='ignore'`` - Bug when comparing a :class:`PeriodIndex` against a zero-dimensional numpy array (:issue:`26689`) +- Bug in constructing a ``Series`` or ``DataFrame`` from a numpy ``datetime64`` array with a non-ns unit and out-of-bound timestamps generating rubbish data, which will now correctly raise an ``OutOfBoundsDatetime`` error (:issue:`26206`). - Bug in :func:`date_range` with unnecessary ``OverflowError`` being raised for very large or very small dates (:issue:`26651`) Timedelta diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 2f66e9ed46aa0..c68d469d291e7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1038,6 +1038,8 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): .tz_convert(dtype.tz)) elif is_timedelta64: value = to_timedelta(value, errors=errors)._values + except OutOfBoundsDatetime: + raise except (AttributeError, ValueError, TypeError): pass @@ -1063,7 +1065,7 @@ def maybe_cast_to_datetime(value, dtype, errors='raise'): dtype = value.dtype if dtype.kind == 'M' and dtype != _NS_DTYPE: - value = value.astype(_NS_DTYPE) + value = tslibs.conversion.ensure_datetime64ns(value) elif dtype.kind == 'm' and dtype != _TD_DTYPE: value = to_timedelta(value) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 2616f0aa97d0d..f564ac13dc41d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -8,7 +8,7 @@ import numpy.ma as ma from pandas._libs import lib -from pandas._libs.tslibs import IncompatibleFrequency +from pandas._libs.tslibs import IncompatibleFrequency, OutOfBoundsDatetime from pandas.compat import raise_with_traceback from pandas.core.dtypes.cast import ( @@ -700,6 +700,9 @@ def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure): elif not is_extension_type(subarr): subarr = construct_1d_ndarray_preserving_na(subarr, dtype, copy=copy) + except OutOfBoundsDatetime: + # in case of out of bound datetime64 -> always raise + raise except (ValueError, TypeError): if is_categorical_dtype(dtype): # We *do* allow casting to categorical, since we know diff --git a/pandas/tests/test_base.py b/pandas/tests/test_base.py index 3b4f85e680f6e..d24ed9433f4f7 100644 --- a/pandas/tests/test_base.py +++ b/pandas/tests/test_base.py @@ -1341,3 +1341,41 @@ def test_to_numpy_dtype(as_series): expected = np.array(['2000-01-01T05', '2001-01-01T05'], dtype='M8[ns]') tm.assert_numpy_array_equal(result, expected) + + +class TestConstruction: + # test certain constructor behaviours on dtype inference across Series, + # Index and DataFrame + + @pytest.mark.parametrize("klass", [ + Series, + lambda x, **kwargs: DataFrame({'a': x}, **kwargs)['a'], + pytest.param(lambda x, **kwargs: DataFrame(x, **kwargs)[0], + marks=pytest.mark.xfail), + Index, + ]) + @pytest.mark.parametrize("a", [ + np.array(['2263-01-01'], dtype='datetime64[D]'), + np.array([datetime(2263, 1, 1)], dtype=object), + np.array([np.datetime64('2263-01-01', 'D')], dtype=object), + np.array(["2263-01-01"], dtype=object) + ], ids=['datetime64[D]', 'object-datetime.datetime', + 'object-numpy-scalar', 'object-string']) + def test_constructor_datetime_outofbound(self, a, klass): + # GH-26853 (+ bug GH-26206 out of bound non-ns unit) + + # No dtype specified (dtype inference) + # datetime64[non-ns] raise error, other cases result in object dtype + # and preserve original data + if a.dtype.kind == 'M': + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a) + else: + result = klass(a) + assert result.dtype == 'object' + tm.assert_numpy_array_equal(result.to_numpy(), a) + + # Explicit dtype specified + # Forced conversion fails for all -> all cases raise error + with pytest.raises(pd.errors.OutOfBoundsDatetime): + klass(a, dtype='datetime64[ns]')