diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 8755abe642068..4db4cab9ea91c 100755 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -781,6 +781,7 @@ Datetimelike - Bug in :class:`Timestamp` subtraction when subtracting a :class:`Timestamp` from a ``np.datetime64`` object incorrectly raising ``TypeError`` (:issue:`28286`) - Addition and subtraction of integer or integer-dtype arrays with :class:`Timestamp` will now raise ``NullFrequencyError`` instead of ``ValueError`` (:issue:`28268`) - Bug in :class:`Series` and :class:`DataFrame` with integer dtype failing to raise ``TypeError`` when adding or subtracting a ``np.datetime64`` object (:issue:`28080`) +- Bug in :meth:`Series.astype`, :meth:`Index.astype`, and :meth:`DataFrame.astype` failing to handle ``NaT`` when casting to an integer dtype (:issue:`28492`) - Bug in :class:`Week` with ``weekday`` incorrectly raising ``AttributeError`` instead of ``TypeError`` when adding or subtracting an invalid type (:issue:`28530`) - Bug in :class:`DataFrame` arithmetic operations when operating with a :class:`Series` with dtype `'timedelta64[ns]'` (:issue:`28049`) - Bug in :func:`pandas.core.groupby.generic.SeriesGroupBy.apply` raising ``ValueError`` when a column in the original DataFrame is a datetime and the column labels are not standard integers (:issue:`28247`) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index fa7b45ec4babd..0579c97747bae 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -823,6 +823,8 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): if is_object_dtype(dtype): return tslib.ints_to_pydatetime(arr.view(np.int64)) elif dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) # allow frequency conversions @@ -835,6 +837,8 @@ def astype_nansafe(arr, dtype, copy: bool = True, skipna: bool = False): if is_object_dtype(dtype): return tslibs.ints_to_pytimedelta(arr.view(np.int64)) elif dtype == np.int64: + if isna(arr).any(): + raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) if dtype not in [_INT64_DTYPE, _TD_DTYPE]: diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 667ee467f2f29..45e41f4bc2228 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -5,6 +5,7 @@ import pandas.util._test_decorators as td +from pandas.core.dtypes.cast import astype_nansafe import pandas.core.dtypes.common as com from pandas.core.dtypes.dtypes import ( CategoricalDtype, @@ -13,6 +14,7 @@ IntervalDtype, PeriodDtype, ) +from pandas.core.dtypes.missing import isna import pandas as pd from pandas.conftest import ( @@ -721,3 +723,42 @@ def test__get_dtype_fails(input_param, expected_error_message): ) def test__is_dtype_type(input_param, result): assert com._is_dtype_type(input_param, lambda tipo: tipo == result) + + +@pytest.mark.parametrize("val", [np.datetime64("NaT"), np.timedelta64("NaT")]) +@pytest.mark.parametrize("typ", [np.int64]) +def test_astype_nansafe(val, typ): + arr = np.array([val]) + + msg = "Cannot convert NaT values to integer" + with pytest.raises(ValueError, match=msg): + astype_nansafe(arr, dtype=typ) + + +@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64]) +@pytest.mark.parametrize( + "to_type", + [ + np.uint8, + np.uint16, + np.uint32, + np.int8, + np.int16, + np.int32, + np.float16, + np.float32, + ], +) +def test_astype_datetime64_bad_dtype_raises(from_type, to_type): + arr = np.array([from_type("2018")]) + + with pytest.raises(TypeError, match="cannot astype"): + astype_nansafe(arr, dtype=to_type) + + +@pytest.mark.parametrize("from_type", [np.datetime64, np.timedelta64]) +def test_astype_object_preserves_datetime_na(from_type): + arr = np.array([from_type("NaT")]) + result = astype_nansafe(arr, dtype="object") + + assert isna(result)[0]