Skip to content

Commit da2f408

Browse files
authored
BUG: DataFrame(ndarray[dt64], dtype=object) (#41599)
1 parent aa3bfc4 commit da2f408

File tree

4 files changed

+45
-14
lines changed

4 files changed

+45
-14
lines changed

doc/source/whatsnew/v1.3.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -819,6 +819,8 @@ Conversion
819819
- Bug in :func:`factorize` where, when given an array with a numeric numpy dtype lower than int64, uint64 and float64, the unique values did not keep their original dtype (:issue:`41132`)
820820
- Bug in :class:`DataFrame` construction with a dictionary containing an arraylike with ``ExtensionDtype`` and ``copy=True`` failing to make a copy (:issue:`38939`)
821821
- Bug in :meth:`qcut` raising error when taking ``Float64DType`` as input (:issue:`40730`)
822+
- Bug in :class:`DataFrame` and :class:`Series` construction with ``datetime64[ns]`` data and ``dtype=object`` resulting in ``datetime`` objects instead of :class:`Timestamp` objects (:issue:`41599`)
823+
- Bug in :class:`DataFrame` and :class:`Series` construction with ``timedelta64[ns]`` data and ``dtype=object`` resulting in ``np.timedelta64`` objects instead of :class:`Timedelta` objects (:issue:`41599`)
822824

823825
Strings
824826
^^^^^^^

pandas/core/dtypes/cast.py

+7-13
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
Timedelta,
3232
Timestamp,
3333
conversion,
34-
ints_to_pydatetime,
3534
)
3635
from pandas._libs.tslibs.timedeltas import array_to_timedelta64
3736
from pandas._typing import (
@@ -1679,18 +1678,13 @@ def maybe_cast_to_datetime(
16791678
raise
16801679
pass
16811680

1682-
# coerce datetimelike to object
1683-
elif is_datetime64_dtype(vdtype) and not is_datetime64_dtype(dtype):
1684-
if is_object_dtype(dtype):
1685-
value = cast(np.ndarray, value)
1686-
1687-
if value.dtype != DT64NS_DTYPE:
1688-
value = value.astype(DT64NS_DTYPE)
1689-
ints = np.asarray(value).view("i8")
1690-
return ints_to_pydatetime(ints)
1691-
1692-
# we have a non-castable dtype that was passed
1693-
raise TypeError(f"Cannot cast datetime64 to {dtype}")
1681+
elif getattr(vdtype, "kind", None) in ["m", "M"]:
1682+
# we are already datetimelike and want to coerce to non-datetimelike;
1683+
# astype_nansafe will raise for anything other than object, then upcast.
1684+
# see test_datetimelike_values_with_object_dtype
1685+
# error: Argument 2 to "astype_nansafe" has incompatible type
1686+
# "Union[dtype[Any], ExtensionDtype]"; expected "dtype[Any]"
1687+
return astype_nansafe(value, dtype) # type: ignore[arg-type]
16941688

16951689
elif isinstance(value, np.ndarray):
16961690
if value.dtype.kind in ["M", "m"]:

pandas/core/generic.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -267,7 +267,7 @@ def _init_mgr(
267267
if (
268268
isinstance(mgr, BlockManager)
269269
and len(mgr.blocks) == 1
270-
and mgr.blocks[0].values.dtype == dtype
270+
and is_dtype_equal(mgr.blocks[0].values.dtype, dtype)
271271
):
272272
pass
273273
else:

pandas/tests/frame/test_constructors.py

+35
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from pandas.core.dtypes.dtypes import (
2525
DatetimeTZDtype,
2626
IntervalDtype,
27+
PandasDtype,
2728
PeriodDtype,
2829
)
2930

@@ -111,6 +112,40 @@ def test_array_of_dt64_nat_with_td64dtype_raises(self, frame_or_series):
111112
with pytest.raises(ValueError, match=msg):
112113
frame_or_series(arr, dtype="m8[ns]")
113114

115+
@pytest.mark.parametrize("kind", ["m", "M"])
116+
def test_datetimelike_values_with_object_dtype(self, kind, frame_or_series):
117+
# with dtype=object, we should cast dt64 values to Timestamps, not pydatetimes
118+
if kind == "M":
119+
dtype = "M8[ns]"
120+
scalar_type = Timestamp
121+
else:
122+
dtype = "m8[ns]"
123+
scalar_type = Timedelta
124+
125+
arr = np.arange(6, dtype="i8").view(dtype).reshape(3, 2)
126+
if frame_or_series is Series:
127+
arr = arr[:, 0]
128+
129+
obj = frame_or_series(arr, dtype=object)
130+
assert obj._mgr.arrays[0].dtype == object
131+
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
132+
133+
# go through a different path in internals.construction
134+
obj = frame_or_series(frame_or_series(arr), dtype=object)
135+
assert obj._mgr.arrays[0].dtype == object
136+
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
137+
138+
obj = frame_or_series(frame_or_series(arr), dtype=PandasDtype(object))
139+
assert obj._mgr.arrays[0].dtype == object
140+
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
141+
142+
if frame_or_series is DataFrame:
143+
# other paths through internals.construction
144+
sers = [Series(x) for x in arr]
145+
obj = frame_or_series(sers, dtype=object)
146+
assert obj._mgr.arrays[0].dtype == object
147+
assert isinstance(obj._mgr.arrays[0].ravel()[0], scalar_type)
148+
114149
def test_series_with_name_not_matching_column(self):
115150
# GH#9232
116151
x = Series(range(5), name=1)

0 commit comments

Comments
 (0)