Skip to content

Commit ead9404

Browse files
[ArrayManager] DataFrame constructor from ndarray (#40441)
1 parent 5faa34c commit ead9404

File tree

3 files changed

+52
-6
lines changed

3 files changed

+52
-6
lines changed

pandas/core/internals/construction.py

+26-4
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
from pandas.core.dtypes.common import (
3535
is_1d_only_ea_dtype,
3636
is_datetime64tz_dtype,
37+
is_datetime_or_timedelta_dtype,
3738
is_dtype_equal,
3839
is_extension_array_dtype,
3940
is_integer_dtype,
@@ -60,6 +61,7 @@
6061
TimedeltaArray,
6162
)
6263
from pandas.core.construction import (
64+
ensure_wrapped_if_datetimelike,
6365
extract_array,
6466
sanitize_array,
6567
)
@@ -316,10 +318,30 @@ def ndarray_to_mgr(
316318
index, columns = _get_axes(
317319
values.shape[0], values.shape[1], index=index, columns=columns
318320
)
319-
values = values.T
320321

321322
_check_values_indices_shape_match(values, index, columns)
322323

324+
if typ == "array":
325+
326+
if issubclass(values.dtype.type, str):
327+
values = np.array(values, dtype=object)
328+
329+
if dtype is None and is_object_dtype(values.dtype):
330+
arrays = [
331+
ensure_wrapped_if_datetimelike(
332+
maybe_infer_to_datetimelike(values[:, i].copy())
333+
)
334+
for i in range(values.shape[1])
335+
]
336+
else:
337+
if is_datetime_or_timedelta_dtype(values.dtype):
338+
values = ensure_wrapped_if_datetimelike(values)
339+
arrays = [values[:, i].copy() for i in range(values.shape[1])]
340+
341+
return ArrayManager(arrays, [index, columns], verify_integrity=False)
342+
343+
values = values.T
344+
323345
# if we don't have a dtype specified, then try to convert objects
324346
# on the entire block; this is to convert if we have datetimelike's
325347
# embedded in an object type
@@ -358,13 +380,13 @@ def _check_values_indices_shape_match(
358380
Check that the shape implied by our axes matches the actual shape of the
359381
data.
360382
"""
361-
if values.shape[0] != len(columns):
383+
if values.shape[1] != len(columns) or values.shape[0] != len(index):
362384
# Could let this raise in Block constructor, but we get a more
363385
# helpful exception message this way.
364-
if values.shape[1] == 0:
386+
if values.shape[0] == 0:
365387
raise ValueError("Empty data passed with indices specified.")
366388

367-
passed = values.T.shape
389+
passed = values.shape
368390
implied = (len(index), len(columns))
369391
raise ValueError(f"Shape of passed values is {passed}, indices imply {implied}")
370392

pandas/tests/frame/methods/test_astype.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -428,11 +428,27 @@ def test_astype_to_incorrect_datetimelike(self, unit):
428428
other = f"m8[{unit}]"
429429

430430
df = DataFrame(np.array([[1, 2, 3]], dtype=dtype))
431-
msg = fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]"
431+
msg = "|".join(
432+
[
433+
# BlockManager path
434+
fr"Cannot cast DatetimeArray to dtype timedelta64\[{unit}\]",
435+
# ArrayManager path
436+
"cannot astype a datetimelike from "
437+
fr"\[datetime64\[ns\]\] to \[timedelta64\[{unit}\]\]",
438+
]
439+
)
432440
with pytest.raises(TypeError, match=msg):
433441
df.astype(other)
434442

435-
msg = fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]"
443+
msg = "|".join(
444+
[
445+
# BlockManager path
446+
fr"Cannot cast TimedeltaArray to dtype datetime64\[{unit}\]",
447+
# ArrayManager path
448+
"cannot astype a timedelta from "
449+
fr"\[timedelta64\[ns\]\] to \[datetime64\[{unit}\]\]",
450+
]
451+
)
436452
df = DataFrame(np.array([[1, 2, 3]], dtype=other))
437453
with pytest.raises(TypeError, match=msg):
438454
df.astype(dtype)

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@
4646
)
4747
import pandas._testing as tm
4848
from pandas.arrays import (
49+
DatetimeArray,
4950
IntervalArray,
5051
PeriodArray,
5152
SparseArray,
@@ -2569,6 +2570,13 @@ def test_construction_from_set_raises(self, typ):
25692570
with pytest.raises(TypeError, match=msg):
25702571
Series(values)
25712572

2573+
def test_construction_from_ndarray_datetimelike(self):
2574+
# ensure the underlying arrays are properly wrapped as EA when
2575+
# constructed from 2D ndarray
2576+
arr = np.arange(0, 12, dtype="datetime64[ns]").reshape(4, 3)
2577+
df = DataFrame(arr)
2578+
assert all(isinstance(arr, DatetimeArray) for arr in df._mgr.arrays)
2579+
25722580

25732581
def get1(obj):
25742582
if isinstance(obj, Series):

0 commit comments

Comments
 (0)