From 2c9ced56d0b4680a6fd8bf21dbf17bc062fe2033 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Dec 2020 12:49:32 -0800 Subject: [PATCH 1/5] BUG: constructing DataFrame from OutOfBounds datetime scalar --- pandas/core/dtypes/cast.py | 6 +++++- pandas/tests/frame/test_constructors.py | 6 ++++++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 25259093f9fba..b3149b3fb9d80 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -725,7 +725,11 @@ def infer_dtype_from_scalar(val, pandas_dtype: bool = False) -> Tuple[DtypeObj, dtype = np.dtype(object) elif isinstance(val, (np.datetime64, datetime)): - val = Timestamp(val) + try: + val = Timestamp(val) + except OutOfBoundsDatetime: + return np.dtype(object), val + if val is NaT or val.tz is None: dtype = np.dtype("M8[ns]") else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 94b2431650359..74bfd61cf5a1c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -3002,3 +3002,9 @@ def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request): scalar = cls(4, "ns") with pytest.raises(TypeError, match="Cannot cast"): constructor(scalar, dtype=dtype) + + def test_from_out_of_bounds_datetime(self, constructor): + scalar = datetime(9999, 1, 1) + result = constructor(scalar) + + assert type(get1(result)) is datetime From 205b0edf8591dd2a3e2cdc64d7c1a5dfe285f20c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 29 Dec 2020 18:28:17 -0800 Subject: [PATCH 2/5] BUG: DataFrame(dt64data, dtype=td64) corner cases --- pandas/core/construction.py | 5 ++++- pandas/core/dtypes/cast.py | 4 +++- pandas/core/internals/construction.py | 4 +++- pandas/tests/frame/methods/test_to_records.py | 9 +++++++++ pandas/tests/frame/test_constructors.py | 18 +----------------- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 261b13e52777b..54a6f47ae1b38 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -615,9 +615,12 @@ def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bo except OutOfBoundsDatetime: # in case of out of bound datetime64 -> always raise raise - except (ValueError, TypeError): + except (ValueError, TypeError) as err: if dtype is not None and raise_cast_failure: raise + elif "Cannot cast" in str(err): + # via _disallow_mismatched_datetimelike + raise else: subarr = np.array(arr, dtype=object, copy=copy) return subarr diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b3149b3fb9d80..700d51202ea3b 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -170,7 +170,7 @@ def maybe_unbox_datetimelike(value: Scalar, dtype: DtypeObj) -> Scalar: return value -def _disallow_mismatched_datetimelike(value: DtypeObj, dtype: DtypeObj): +def _disallow_mismatched_datetimelike(value, dtype: DtypeObj): """ numpy allows np.array(dt64values, dtype="timedelta64[ns]") and vice-versa, but we do not want to allow this, so we need to @@ -1476,6 +1476,8 @@ def maybe_cast_to_datetime(value, dtype: Optional[DtypeObj]): # we have an array of datetime or timedeltas & nulls elif np.prod(value.shape) or not is_dtype_equal(value.dtype, dtype): + _disallow_mismatched_datetimelike(value, dtype) + try: if is_datetime64: value = to_datetime(value, errors="raise") diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index d59cfc436f13d..20bd022ea2136 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -271,7 +271,9 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): nan_dtype = np.dtype(object) else: nan_dtype = dtype - val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) + + na_val = arrays[missing].iat[0] + val = construct_1d_arraylike_from_scalar(na_val, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index e83882be9c680..19ced1ce456e6 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -379,3 +379,12 @@ def test_to_records_datetimeindex_with_tz(self, tz): # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected) + + def test_to_records_timeseries(self): + index = date_range("1/1/2000", periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) + + result = df.to_records() + result["index"].dtype == "M8[ns]" + + result = df.to_records(index=False) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 74bfd61cf5a1c..d8f59e24c403b 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2823,15 +2823,6 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): d = DataFrame({"A": "foo", "B": ts}, index=dr) assert d["B"].isna().all() - def test_frame_timeseries_to_records(self): - index = date_range("1/1/2000", periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) - - result = df.to_records() - result["index"].dtype == "M8[ns]" - - result = df.to_records(index=False) - def test_frame_timeseries_column(self): # GH19157 dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern") @@ -2984,14 +2975,7 @@ def test_from_timedelta64_scalar_object(self, constructor, request): assert isinstance(get1(obj), np.timedelta64) @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) - def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request): - node = request.node - params = node.callspec.params - if params["frame_or_series"] is DataFrame and params["constructor"] is not None: - mark = pytest.mark.xfail( - reason="DataFrame incorrectly allows mismatched datetimelike" - ) - node.add_marker(mark) + def test_from_scalar_datetimelike_mismatched(self, constructor, cls): scalar = cls("NaT", "ns") dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] From c5f40a3360f62d8dfb35b2b2310cfad62e6e06de Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 18:14:23 -0800 Subject: [PATCH 3/5] revert unrelated test move --- pandas/tests/frame/methods/test_to_records.py | 9 --------- pandas/tests/frame/test_constructors.py | 9 +++++++++ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/tests/frame/methods/test_to_records.py b/pandas/tests/frame/methods/test_to_records.py index 19ced1ce456e6..e83882be9c680 100644 --- a/pandas/tests/frame/methods/test_to_records.py +++ b/pandas/tests/frame/methods/test_to_records.py @@ -379,12 +379,3 @@ def test_to_records_datetimeindex_with_tz(self, tz): # both converted to UTC, so they are equal tm.assert_numpy_array_equal(result, expected) - - def test_to_records_timeseries(self): - index = date_range("1/1/2000", periods=10) - df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) - - result = df.to_records() - result["index"].dtype == "M8[ns]" - - result = df.to_records(index=False) diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index d8f59e24c403b..6d05d9135eeb9 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2823,6 +2823,15 @@ def test_frame_datetime64_mixed_index_ctor_1681(self): d = DataFrame({"A": "foo", "B": ts}, index=dr) assert d["B"].isna().all() + def test_frame_timeseries_to_records(self): + index = date_range("1/1/2000", periods=10) + df = DataFrame(np.random.randn(10, 3), index=index, columns=["a", "b", "c"]) + + result = df.to_records() + result["index"].dtype == "M8[ns]" + + result = df.to_records(index=False) + def test_frame_timeseries_column(self): # GH19157 dr = date_range(start="20130101T10:00:00", periods=3, freq="T", tz="US/Eastern") From 84eaea70fad4d117e341ba8e27e5a39de90b069e Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 30 Dec 2020 18:17:04 -0800 Subject: [PATCH 4/5] revert dict case --- pandas/core/internals/construction.py | 4 +--- pandas/tests/frame/test_constructors.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 20bd022ea2136..d59cfc436f13d 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -271,9 +271,7 @@ def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None): nan_dtype = np.dtype(object) else: nan_dtype = dtype - - na_val = arrays[missing].iat[0] - val = construct_1d_arraylike_from_scalar(na_val, len(index), nan_dtype) + val = construct_1d_arraylike_from_scalar(np.nan, len(index), nan_dtype) arrays.loc[missing] = [val] * missing.sum() else: diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 6d05d9135eeb9..dcade94e0186c 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -2984,8 +2984,14 @@ def test_from_timedelta64_scalar_object(self, constructor, request): assert isinstance(get1(obj), np.timedelta64) @pytest.mark.parametrize("cls", [np.datetime64, np.timedelta64]) - def test_from_scalar_datetimelike_mismatched(self, constructor, cls): - + def test_from_scalar_datetimelike_mismatched(self, constructor, cls, request): + node = request.node + params = node.callspec.params + if params["frame_or_series"] is DataFrame and params["constructor"] is dict: + mark = pytest.mark.xfail( + reason="DataFrame incorrectly allows mismatched datetimelike" + ) + node.add_marker(mark) scalar = cls("NaT", "ns") dtype = {np.datetime64: "m8[ns]", np.timedelta64: "M8[ns]"}[cls] From d310192cdf5328be4e1dbca4389478e00781a2a9 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 31 Dec 2020 10:57:45 -0800 Subject: [PATCH 5/5] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f66098633b45e..d2269b8ef78e1 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -190,7 +190,8 @@ Datetimelike ^^^^^^^^^^^^ - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) -- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`) +- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`, :issue:`38764`, :issue:`38792`) +- Bug in constructing a :class:`Series` or :class:`DataFrame` with a ``datetime`` object out of bounds for ``datetime64[ns]`` dtype (:issue:`38792`) - Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) - Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`) -