Skip to content

Commit 483202e

Browse files
committed
BUG: 2D ndarray of dtype 'object' is always copied upon construction (pandas-dev#39263)
1 parent c322b24 commit 483202e

File tree

3 files changed

+21
-15
lines changed

3 files changed

+21
-15
lines changed

doc/source/whatsnew/v1.3.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,7 @@ Datetimelike
237237
- Bug in comparisons between :class:`Timestamp` object and ``datetime64`` objects just outside the implementation bounds for nanosecond ``datetime64`` (:issue:`39221`)
238238
- Bug in :meth:`Timestamp.round`, :meth:`Timestamp.floor`, :meth:`Timestamp.ceil` for values near the implementation bounds of :class:`Timestamp` (:issue:`39244`)
239239
- Bug in :func:`date_range` incorrectly creating :class:`DatetimeIndex` containing ``NaT`` instead of raising ``OutOfBoundsDatetime`` in corner cases (:issue:`24124`)
240+
- Bug in :class:`DataFrame` constructor always copying 2D object arrays
240241

241242
Timedelta
242243
^^^^^^^^^

pandas/core/internals/construction.py

+12-15
Original file line numberDiff line numberDiff line change
@@ -236,23 +236,20 @@ def init_ndarray(values, index, columns, dtype: Optional[DtypeObj], copy: bool):
236236
# on the entire block; this is to convert if we have datetimelike's
237237
# embedded in an object type
238238
if dtype is None and is_object_dtype(values.dtype):
239-
240239
if values.ndim == 2 and values.shape[0] != 1:
241-
# transpose and separate blocks
242-
243-
dvals_list = [maybe_infer_to_datetimelike(row) for row in values]
244-
for n in range(len(dvals_list)):
245-
if isinstance(dvals_list[n], np.ndarray):
246-
dvals_list[n] = dvals_list[n].reshape(1, -1)
247-
248-
from pandas.core.internals.blocks import make_block
249-
250-
# TODO: What about re-joining object columns?
251-
block_values = [
252-
make_block(dvals_list[n], placement=[n], ndim=2)
253-
for n in range(len(dvals_list))
240+
maybe_datetime = [
241+
maybe_infer_to_datetimelike(instance) for instance in values
254242
]
255-
243+
# don't convert (and copy) the objects if no type inference occurs
244+
if any(
245+
not is_dtype_equal(instance.dtype, values.dtype)
246+
for instance in maybe_datetime
247+
):
248+
return create_block_manager_from_arrays(
249+
maybe_datetime, columns, [columns, index]
250+
)
251+
else:
252+
block_values = [values]
256253
else:
257254
datelike_vals = maybe_infer_to_datetimelike(values)
258255
block_values = [datelike_vals]

pandas/tests/frame/test_constructors.py

+8
Original file line numberDiff line numberDiff line change
@@ -1845,6 +1845,14 @@ def test_constructor_series_copy(self, float_frame):
18451845

18461846
assert not (series["A"] == 5).all()
18471847

1848+
def test_object_array_does_not_copy(self):
1849+
a = np.array(["a", "b"], dtype="object")
1850+
b = np.array([["a", "b"], ["c", "d"]], dtype="object")
1851+
df = DataFrame(a)
1852+
assert np.shares_memory(df.values, a)
1853+
df2 = DataFrame(b)
1854+
assert np.shares_memory(df2.values, b)
1855+
18481856
def test_constructor_with_nas(self):
18491857
# GH 5016
18501858
# na's in indices

0 commit comments

Comments
 (0)