From cfbf6bc658b21796a0e3df99935f755bff31440e Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 10:56:59 -0800 Subject: [PATCH 1/3] BUG: Series(dt64values, dtype=Sparse[object]) --- pandas/core/dtypes/cast.py | 7 +++++++ pandas/tests/dtypes/test_common.py | 13 +++++++++++++ pandas/tests/series/test_constructors.py | 17 +++++++++++++++++ 3 files changed, 37 insertions(+) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 63445d0e1598d..60813d37ffa57 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -980,6 +980,13 @@ def astype_nansafe( elif not isinstance(dtype, np.dtype): raise ValueError("dtype must be np.dtype or ExtensionDtype") + if arr.dtype.kind in ["m", "M"] and dtype == object: + # make sure we wrap in Timedelta/Timestamp, not timedelta/datetime + from pandas.core.construction import ensure_wrapped_if_datetimelike + + arr = ensure_wrapped_if_datetimelike(arr) + return arr.astype(dtype, copy=copy) + if issubclass(dtype.type, str): return lib.ensure_string_array( arr.ravel(), skipna=skipna, convert_na_value=False diff --git a/pandas/tests/dtypes/test_common.py b/pandas/tests/dtypes/test_common.py index 0d0601aa542b4..7e8ce19d03ccd 100644 --- a/pandas/tests/dtypes/test_common.py +++ b/pandas/tests/dtypes/test_common.py @@ -754,6 +754,19 @@ def test_astype_object_preserves_datetime_na(from_type): assert isna(result)[0] +@pytest.mark.parametrize("from_dtype", ["datetime64[ns]", "timedelta64[ns]"]) +def test_astype_object_boxes_timestamps_timedeltas(from_dtype): + arr = np.array([3600], dtype=from_dtype) + result = astype_nansafe(arr, dtype=np.dtype("object")) + + assert result.dtype == object + + if from_dtype == "datetime64[ns]": + assert isinstance(result[0], pd.Timestamp) + else: + assert isinstance(result[0], pd.Timedelta) + + def test_validate_allhashable(): assert com.validate_all_hashable(1, "a") is None diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 5b13091470b09..ef857d9f1f0e3 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -24,6 +24,7 @@ Period, RangeIndex, Series, + Timedelta, Timestamp, date_range, isna, @@ -1513,6 +1514,22 @@ def test_constructor_sparse_datetime64(self, values): expected = Series(arr) tm.assert_series_equal(result, expected) + def test_constructor_datetimelike_to_sparse_object(self): + arr = np.arange(3).view("M8[D]").astype("M8[ns]") + arr2 = arr.view("m8[ns]") + + ser = Series(arr, dtype="Sparse[object]") + assert isinstance(ser[0], Timestamp) + + ser = Series(arr).astype("Sparse[object]") + assert isinstance(ser[0], Timestamp) + + ser2 = Series(arr2, dtype="Sparse[object]") + assert isinstance(ser2[0], Timedelta) + + ser2 = Series(arr2).astype("Sparse[object]") + assert isinstance(ser2[0], Timedelta) + def test_construction_from_ordered_collection(self): # https://github.com/pandas-dev/pandas/issues/36044 result = Series({"a": 1, "b": 2}.keys()) From 89906097bfd846add9e981fe68a6ac04d2dd2c75 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 11:15:42 -0800 Subject: [PATCH 2/3] CLN: remove unnecessary checks --- pandas/core/dtypes/cast.py | 9 ++------- pandas/core/internals/blocks.py | 10 ---------- 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 60813d37ffa57..d0b85dad24099 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -30,7 +30,6 @@ conversion, iNaT, ints_to_pydatetime, - ints_to_pytimedelta, ) from pandas._libs.tslibs.timezones import tz_compare from pandas._typing import AnyArrayLike, ArrayLike, Dtype, DtypeObj, Scalar @@ -993,9 +992,7 @@ def astype_nansafe( ).reshape(arr.shape) elif is_datetime64_dtype(arr): - if is_object_dtype(dtype): - return ints_to_pydatetime(arr.view(np.int64)) - elif dtype == np.int64: + if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) @@ -1007,9 +1004,7 @@ def astype_nansafe( raise TypeError(f"cannot astype a datetimelike from [{arr.dtype}] to [{dtype}]") elif is_timedelta64_dtype(arr): - if is_object_dtype(dtype): - return ints_to_pytimedelta(arr.view(np.int64)) - elif dtype == np.int64: + if dtype == np.int64: if isna(arr).any(): raise ValueError("Cannot convert NaT values to integer") return arr.view(dtype) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index b9558daf05ad2..1765dab5a8455 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -681,16 +681,6 @@ def _astype(self, dtype: DtypeObj, copy: bool) -> ArrayLike: # StringDtype, this matches arr.astype(dtype), xref GH#36153 values = arr._format_native_types(na_rep="NaT") - elif is_object_dtype(dtype): - if values.dtype.kind in ["m", "M"]: - # Wrap in Timedelta/Timestamp - arr = pd_array(values) - values = arr.astype(object) - else: - values = values.astype(object) - # We still need to go through astype_nansafe for - # e.g. dtype = Sparse[object, 0] - # astype_nansafe works with 1-d only vals1d = values.ravel() values = astype_nansafe(vals1d, dtype, copy=True) From 2c44d66dcf1a2e1593c45270974635d6e51ee67c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Dec 2020 12:41:11 -0800 Subject: [PATCH 3/3] 32bit fixup --- pandas/tests/series/test_constructors.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index ef857d9f1f0e3..8d99467e5285a 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1515,7 +1515,7 @@ def test_constructor_sparse_datetime64(self, values): tm.assert_series_equal(result, expected) def test_constructor_datetimelike_to_sparse_object(self): - arr = np.arange(3).view("M8[D]").astype("M8[ns]") + arr = np.arange(3, dtype="i8").view("M8[D]").astype("M8[ns]") arr2 = arr.view("m8[ns]") ser = Series(arr, dtype="Sparse[object]")