Skip to content

Commit b9cfae1

Browse files
irgolicfeefladder
authored andcommitted
BUG: 2D ndarray of dtype 'object' is always copied upon construction (pandas-dev#39272)
1 parent 7aa5300 commit b9cfae1

File tree

3 files changed

+21
-11
lines changed

3 files changed

+21
-11
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,7 @@ Categorical
180180
Datetimelike
181181
^^^^^^^^^^^^
182182
- Bug in :func:`to_datetime` returning pd.NaT for inputs that produce duplicated values, when ``cache=True`` (:issue:`42259`)
183+
- Bug in :class:`DataFrame` constructor unnecessarily copying non-datetimelike 2D object arrays (:issue:`39272`)
183184
-
184185

185186
Timedelta

pandas/core/internals/construction.py

+6-11
Original file line numberDiff line numberDiff line change
@@ -348,22 +348,17 @@ def ndarray_to_mgr(
348348
# on the entire block; this is to convert if we have datetimelike's
349349
# embedded in an object type
350350
if dtype is None and is_object_dtype(values.dtype):
351-
352-
if values.ndim == 2 and values.shape[0] != 1:
353-
# transpose and separate blocks
354-
355-
dtlike_vals = [maybe_infer_to_datetimelike(row) for row in values]
356-
dvals_list = [ensure_block_shape(dval, 2) for dval in dtlike_vals]
357-
358-
# TODO: What about re-joining object columns?
351+
obj_columns = list(values)
352+
maybe_datetime = [maybe_infer_to_datetimelike(x) for x in obj_columns]
353+
# don't convert (and copy) the objects if no type inference occurs
354+
if any(x is not y for x, y in zip(obj_columns, maybe_datetime)):
355+
dvals_list = [ensure_block_shape(dval, 2) for dval in maybe_datetime]
359356
block_values = [
360357
new_block(dvals_list[n], placement=n, ndim=2)
361358
for n in range(len(dvals_list))
362359
]
363-
364360
else:
365-
datelike_vals = maybe_infer_to_datetimelike(values)
366-
nb = new_block(datelike_vals, placement=slice(len(columns)), ndim=2)
361+
nb = new_block(values, placement=slice(len(columns)), ndim=2)
367362
block_values = [nb]
368363
else:
369364
nb = new_block(values, placement=slice(len(columns)), ndim=2)

pandas/tests/frame/test_constructors.py

+14
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,20 @@ def test_constructor_dtype_nocast_view_2d_array(self):
253253
should_be_view[0][0] = 97
254254
assert df.values[0, 0] == 97
255255

256+
@td.skip_array_manager_invalid_test
257+
def test_1d_object_array_does_not_copy(self):
258+
# https://github.com/pandas-dev/pandas/issues/39272
259+
arr = np.array(["a", "b"], dtype="object")
260+
df = DataFrame(arr)
261+
assert np.shares_memory(df.values, arr)
262+
263+
@td.skip_array_manager_invalid_test
264+
def test_2d_object_array_does_not_copy(self):
265+
# https://github.com/pandas-dev/pandas/issues/39272
266+
arr = np.array([["a", "b"], ["c", "d"]], dtype="object")
267+
df = DataFrame(arr)
268+
assert np.shares_memory(df.values, arr)
269+
256270
def test_constructor_dtype_list_data(self):
257271
df = DataFrame([[1, "2"], [None, "a"]], dtype=object)
258272
assert df.loc[1, 0] is None

0 commit comments

Comments
 (0)