Skip to content

Commit ff2f271

Browse files
committed
BUG: 2D ndarray of dtype 'object' is always copied upon construction (pandas-dev#39263)
1 parent 2be78cc commit ff2f271

File tree

5 files changed

+39
-52
lines changed

5 files changed

+39
-52
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -280,6 +280,7 @@ Datetimelike
280280
- Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`)
281281
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
282282
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
283+
- Bug in :class:`DataFrame` constructor always copying 2D object arrays (:issue:`39272`)
283284

284285
Timedelta
285286
^^^^^^^^^

pandas/core/internals/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
BlockManager,
1919
SingleBlockManager,
2020
create_block_manager_from_arrays,
21-
create_block_manager_from_blocks,
2221
)
2322

2423
__all__ = [
@@ -40,5 +39,4 @@
4039
"concatenate_block_managers",
4140
# those two are preserved here for downstream compatibility (GH-33892)
4241
"create_block_manager_from_arrays",
43-
"create_block_manager_from_blocks",
4442
]

pandas/core/internals/construction.py

+17-24
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@
6060
union_indexes,
6161
)
6262
from pandas.core.internals.managers import (
63+
create_block_manager_from_array,
6364
create_block_manager_from_arrays,
64-
create_block_manager_from_blocks,
6565
)
6666

6767
if TYPE_CHECKING:
@@ -230,36 +230,29 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
230230
index, columns = _get_axes(
231231
values.shape[0], values.shape[1], index=index, columns=columns
232232
)
233-
values = values.T
233+
234+
array = values.T
234235

235236
# if we don't have a dtype specified, then try to convert objects
236237
# on the entire block; this is to convert if we have datetimelike's
237238
# embedded in an object type
238-
if dtype is None and is_object_dtype(values.dtype):
239-
240-
if values.ndim == 2 and values.shape[0] != 1:
241-
# transpose and separate blocks
242-
243-
dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
244-
for n in range(len(dvals_list)):
245-
if isinstance(dvals_list[n], np.ndarray):
246-
dvals_list[n] = dvals_list[n].reshape(1, -1)
247-
248-
from pandas.core.internals.blocks import make_block
249-
250-
# TODO: What about re-joining object columns?
251-
block_values = [
252-
make_block(dvals_list[n], placement=[n], ndim=2)
253-
for n in range(len(dvals_list))
239+
if dtype is None and is_object_dtype(array.dtype):
240+
if array.ndim == 2 and array.shape[0] != 1:
241+
maybe_datetime = [
242+
maybe_infer_to_datetimelike(instance) for instance in array
254243
]
255-
244+
# don't convert (and copy) the objects if no type inference occurs
245+
if any(
246+
not is_dtype_equal(instance.dtype, array.dtype)
247+
for instance in maybe_datetime
248+
):
249+
return create_block_manager_from_arrays(
250+
maybe_datetime, columns, [columns, index]
251+
)
256252
else:
257-
datelike_vals = maybe_infer_to_datetimelike(values)
258-
block_values = [datelike_vals]
259-
else:
260-
block_values = [values]
253+
array = maybe_infer_to_datetimelike(array)
261254

262-
return create_block_manager_from_blocks(block_values, [columns, index])
255+
return create_block_manager_from_array(array, [columns, index])
263256

264257

265258
def init_dict(data: Dict, index, columns, dtype: Optional[DtypeObj] = None):

pandas/core/internals/managers.py

+13-26
Original file line numberDiff line numberDiff line change
@@ -1633,32 +1633,6 @@ def fast_xs(self, loc):
16331633
# Constructor Helpers
16341634

16351635

1636-
def create_block_manager_from_blocks(blocks, axes: List[Index]) -> BlockManager:
1637-
try:
1638-
if len(blocks) == 1 and not isinstance(blocks[0], Block):
1639-
# if blocks[0] is of length 0, return empty blocks
1640-
if not len(blocks[0]):
1641-
blocks = []
1642-
else:
1643-
# It's OK if a single block is passed as values, its placement
1644-
# is basically "all items", but if there're many, don't bother
1645-
# converting, it's an error anyway.
1646-
blocks = [
1647-
make_block(
1648-
values=blocks[0], placement=slice(0, len(axes[0])), ndim=2
1649-
)
1650-
]
1651-
1652-
mgr = BlockManager(blocks, axes)
1653-
mgr._consolidate_inplace()
1654-
return mgr
1655-
1656-
except ValueError as e:
1657-
blocks = [getattr(b, "values", b) for b in blocks]
1658-
tot_items = sum(b.shape[0] for b in blocks)
1659-
raise construction_error(tot_items, blocks[0].shape[1:], axes, e)
1660-
1661-
16621636
def create_block_manager_from_arrays(
16631637
arrays, names: Index, axes: List[Index]
16641638
) -> BlockManager:
@@ -1678,6 +1652,19 @@ def create_block_manager_from_arrays(
16781652
raise construction_error(len(arrays), arrays[0].shape, axes, e)
16791653

16801654

1655+
def create_block_manager_from_array(
1656+
array,
1657+
axes: List[Index],
1658+
) -> BlockManager:
1659+
try:
1660+
block = make_block(values=array, placement=slice(0, len(axes[0])), ndim=2)
1661+
mgr = BlockManager([block], axes)
1662+
mgr._consolidate_inplace()
1663+
except ValueError as e:
1664+
raise construction_error(array.shape[0], array.shape[1:], axes, e)
1665+
return mgr
1666+
1667+
16811668
def construction_error(tot_items, block_shape, axes, e=None):
16821669
""" raise a helpful message about our construction """
16831670
passed = tuple(map(int, [tot_items] + list(block_shape)))

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -1909,6 +1909,14 @@ def test_constructor_series_copy(self, float_frame):
19091909

19101910
assert not (series["A"] == 5).all()
19111911

1912+
def test_object_array_does_not_copy(self):
1913+
a = np.array(["a", "b"], dtype="object")
1914+
b = np.array([["a", "b"], ["c", "d"]], dtype="object")
1915+
df = DataFrame(a)
1916+
assert np.shares_memory(df.values, a)
1917+
df2 = DataFrame(b)
1918+
assert np.shares_memory(df2.values, b)
1919+
19121920
def test_constructor_with_nas(self):
19131921
# GH 5016
19141922
# na's in indices

0 commit comments

Comments
 (0)