Skip to content

REF: avoid ravel/reshape in astype_nansafe, ndarray_to_mgr #45817

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Feb 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion pandas/core/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -786,7 +786,16 @@ def _try_cast(

elif dtype.kind == "U":
# TODO: test cases with arr.dtype.kind in ["m", "M"]
return lib.ensure_string_array(arr, convert_na_value=False, copy=copy)
if is_ndarray:
arr = cast(np.ndarray, arr)
shape = arr.shape
if arr.ndim > 1:
arr = arr.ravel()
else:
shape = (len(arr),)
return lib.ensure_string_array(arr, convert_na_value=False, copy=copy).reshape(
shape
)

elif dtype.kind in ["m", "M"]:
return maybe_cast_to_datetime(arr, dtype)
Expand Down
15 changes: 7 additions & 8 deletions pandas/core/dtypes/astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,12 +83,6 @@ def astype_nansafe(
ValueError
The dtype was a datetime64/timedelta64 dtype, but it had no unit.
"""
if arr.ndim > 1:
flat = arr.ravel()
result = astype_nansafe(flat, dtype, copy=copy, skipna=skipna)
# error: Item "ExtensionArray" of "Union[ExtensionArray, ndarray]" has no
# attribute "reshape"
return result.reshape(arr.shape) # type: ignore[union-attr]

# We get here with 0-dim from sparse
arr = np.atleast_1d(arr)
Expand All @@ -109,7 +103,12 @@ def astype_nansafe(
return arr.astype(dtype, copy=copy)

if issubclass(dtype.type, str):
return lib.ensure_string_array(arr, skipna=skipna, convert_na_value=False)
shape = arr.shape
if arr.ndim > 1:
arr = arr.ravel()
return lib.ensure_string_array(
arr, skipna=skipna, convert_na_value=False
).reshape(shape)

elif is_datetime64_dtype(arr.dtype):
if dtype == np.int64:
Expand Down Expand Up @@ -146,7 +145,7 @@ def astype_nansafe(
from pandas import to_datetime

return astype_nansafe(
to_datetime(arr).values,
to_datetime(arr.ravel()).values.reshape(arr.shape),
dtype,
copy=copy,
)
Expand Down
12 changes: 6 additions & 6 deletions pandas/core/internals/construction.py
Original file line number Diff line number Diff line change
Expand Up @@ -329,18 +329,18 @@ def ndarray_to_mgr(
values = _prep_ndarray(values, copy=copy_on_sanitize)

if dtype is not None and not is_dtype_equal(values.dtype, dtype):
shape = values.shape
flat = values.ravel()

# GH#40110 see similar check inside sanitize_array
rcf = not (is_integer_dtype(dtype) and values.dtype.kind == "f")

values = sanitize_array(
flat, None, dtype=dtype, copy=copy_on_sanitize, raise_cast_failure=rcf
values,
None,
dtype=dtype,
copy=copy_on_sanitize,
raise_cast_failure=rcf,
allow_2d=True,
)

values = values.reshape(shape)

# _prep_ndarray ensures that values.ndim == 2 at this point
index, columns = _get_axes(
values.shape[0], values.shape[1], index=index, columns=columns
Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,14 @@


class TestDataFrameConstructors:
def test_constructor_from_ndarray_with_str_dtype(self):
# If we don't ravel/reshape around ensure_str_array, we end up
# with an array of strings each of which is e.g. "[0 1 2]"
arr = np.arange(12).reshape(4, 3)
df = DataFrame(arr, dtype=str)
expected = DataFrame(arr.astype(str))
tm.assert_frame_equal(df, expected)

def test_constructor_from_2d_datetimearray(self, using_array_manager):
dti = date_range("2016-01-01", periods=6, tz="US/Pacific")
dta = dti._data.reshape(3, 2)
Expand Down