Skip to content

Commit 40b0cea

Browse files
committed
BUG: 2D ndarray of dtype 'object' is always copied upon construction (pandas-dev#39263)
1 parent edbd450 commit 40b0cea

File tree

3 files changed

+36
-9
lines changed

3 files changed

+36
-9
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ Datetimelike
231231
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)
232232
- Bug in :meth:`Series.where` incorrectly casting ``datetime64`` values to ``int64`` (:issue:`37682`)
233233
- Bug in :class:`Categorical` incorrectly typecasting ``datetime`` object to ``Timestamp`` (:issue:`38878`)
234+
- Bug in :func:`pandas.core.internals.construction.init_ndarray` unnecessarily copying all object arrays after datetime inference
234235

235236
Timedelta
236237
^^^^^^^^^

pandas/core/internals/construction.py

+27-9
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
constructors before passing them to a BlockManager.
44
"""
55
from collections import abc
6+
from itertools import groupby
67
from typing import (
78
TYPE_CHECKING,
89
Any,
@@ -240,18 +241,35 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
240241
if values.ndim == 2 and values.shape[0] != 1:
241242
# transpose and separate blocks
242243

243-
dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
244-
for n in range(len(dvals_list)):
245-
if isinstance(dvals_list[n], np.ndarray):
246-
dvals_list[n] = dvals_list[n].reshape(1, -1)
244+
dvals_list = (maybe_infer_to_datetimelike(row) for row in values)
247245

248246
from pandas.core.internals.blocks import make_block
249247

250-
# TODO: What about re-joining object columns?
251-
block_values = [
252-
make_block(dvals_list[n], placement=[n], ndim=2)
253-
for n in range(len(dvals_list))
254-
]
248+
i = 0
249+
block_values = []
250+
for is_object, group in groupby(
251+
dvals_list, lambda row: is_object_dtype(row.dtype)
252+
):
253+
dval_group = list(group)
254+
ei = i + len(dval_group)
255+
if is_object:
256+
block_values.append(
257+
make_block(
258+
values[i:ei],
259+
placement=slice(i, ei),
260+
ndim=2,
261+
)
262+
)
263+
else:
264+
block_values.extend(
265+
make_block(
266+
row.reshape(1, -1) if isinstance(row, np.ndarray) else row,
267+
placement=[i + incr],
268+
ndim=2,
269+
)
270+
for incr, row in enumerate(dval_group)
271+
)
272+
i = ei
255273

256274
else:
257275
datelike_vals = maybe_infer_to_datetimelike(values)

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -2267,6 +2267,14 @@ def test_nested_dict_construction(self):
22672267
)
22682268
tm.assert_frame_equal(result, expected)
22692269

2270+
def test_object_array_does_not_copy(self):
2271+
a = np.array(["a", "b"], dtype="object")
2272+
b = np.array([["a", "b"], ["c", "d"]], dtype="object")
2273+
df = DataFrame(a)
2274+
assert np.shares_memory(df.values, a)
2275+
df2 = DataFrame(b)
2276+
assert np.shares_memory(df2.values, b)
2277+
22702278
def test_from_tzaware_object_array(self):
22712279
# GH#26825 2D object array of tzaware timestamps should not raise
22722280
dti = date_range("2016-04-05 04:30", periods=3, tz="UTC")

0 commit comments

Comments
 (0)