diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 997611d7860db..0ac5c4bb8edc2 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -597,6 +597,15 @@ def sanitize_array( # e.g. test_constructor_floating_data_int_dtype # TODO: where is the discussion that documents the reason for this? subarr = np.array(data, copy=copy) + + elif dtype is None: + subarr = data + if data.dtype == object: + subarr = maybe_infer_to_datetimelike(data) + + if subarr is data and copy: + subarr = subarr.copy() + else: # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy) @@ -754,7 +763,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: def _try_cast( arr: list | np.ndarray, - dtype: np.dtype | None, + dtype: np.dtype, copy: bool, ) -> ArrayLike: """ @@ -764,7 +773,7 @@ def _try_cast( ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. - dtype : np.dtype or None + dtype : np.dtype copy : bool If False, don't copy the data if not needed. @@ -774,30 +783,7 @@ def _try_cast( """ is_ndarray = isinstance(arr, np.ndarray) - if dtype is None: - # perf shortcut as this is the most common case - if is_ndarray: - arr = cast(np.ndarray, arr) - if arr.dtype != object: - if copy: - return arr.copy() - return arr - - out = maybe_infer_to_datetimelike(arr) - if out is arr and copy: - out = out.copy() - return out - - else: - # i.e. list - varr = np.array(arr, copy=False) - # filter out cases that we _dont_ want to go through - # maybe_infer_to_datetimelike - if varr.dtype != object or varr.size == 0: - return varr - return maybe_infer_to_datetimelike(varr) - - elif is_object_dtype(dtype): + if is_object_dtype(dtype): if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) return subarr diff --git a/pandas/core/frame.py b/pandas/core/frame.py index abd08b14caaa8..0ae80ff2a2ee2 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -738,7 +738,7 @@ def __init__( # For data is list-like, or Iterable (will consume into list) elif is_list_like(data): - if not isinstance(data, (abc.Sequence, ExtensionArray)): + if not isinstance(data, abc.Sequence): if hasattr(data, "__array__"): # GH#44616 big perf improvement for e.g. pytorch tensor data = np.asarray(data) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index c1745630602ab..761a641ccb2f7 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -53,9 +53,7 @@ ) from pandas.core.arrays import ( Categorical, - DatetimeArray, ExtensionArray, - TimedeltaArray, ) from pandas.core.construction import ( ensure_wrapped_if_datetimelike, @@ -277,14 +275,20 @@ def ndarray_to_mgr( return arrays_to_mgr(values, columns, index, dtype=dtype, typ=typ) - elif is_extension_array_dtype(vdtype) and not is_1d_only_ea_dtype(vdtype): - # i.e. Datetime64TZ, PeriodDtype + elif is_extension_array_dtype(vdtype): + # i.e. Datetime64TZ, PeriodDtype; cases with is_1d_only_ea_dtype(vdtype) + # are already caught above values = extract_array(values, extract_numpy=True) if copy: values = values.copy() if values.ndim == 1: values = values.reshape(-1, 1) + elif isinstance(values, (np.ndarray, ExtensionArray, ABCSeries, Index)): + # drop subclass info + values = np.array(values, copy=copy_on_sanitize) + values = _ensure_2d(values) + else: # by definition an array here # the dtypes will be coerced to a single dtype @@ -496,51 +500,50 @@ def treat_as_nested(data) -> bool: # --------------------------------------------------------------------- -def _prep_ndarraylike( - values, copy: bool = True -) -> np.ndarray | DatetimeArray | TimedeltaArray: - if isinstance(values, TimedeltaArray) or ( - isinstance(values, DatetimeArray) and values.tz is None - ): - # By retaining DTA/TDA instead of unpacking, we end up retaining non-nano - pass - - elif not isinstance(values, (np.ndarray, ABCSeries, Index)): - if len(values) == 0: - return np.empty((0, 0), dtype=object) - elif isinstance(values, range): - arr = range_to_ndarray(values) - return arr[..., np.newaxis] - - def convert(v): - if not is_list_like(v) or isinstance(v, ABCDataFrame): - return v - - v = extract_array(v, extract_numpy=True) - res = maybe_convert_platform(v) - return res - - # we could have a 1-dim or 2-dim list here - # this is equiv of np.asarray, but does object conversion - # and platform dtype preservation - if is_list_like(values[0]): - values = np.array([convert(v) for v in values]) - elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: - # GH#21861 see test_constructor_list_of_lists - values = np.array([convert(v) for v in values]) - else: - values = convert(values) - +def _prep_ndarraylike(values, copy: bool = True) -> np.ndarray: + # values is specifically _not_ ndarray, EA, Index, or Series + # We only get here with `not treat_as_nested(values)` + + if len(values) == 0: + return np.empty((0, 0), dtype=object) + elif isinstance(values, range): + arr = range_to_ndarray(values) + return arr[..., np.newaxis] + + def convert(v): + if not is_list_like(v) or isinstance(v, ABCDataFrame): + return v + + v = extract_array(v, extract_numpy=True) + res = maybe_convert_platform(v) + # We don't do maybe_infer_to_datetimelike here bc we will end up doing + # it column-by-column in ndarray_to_mgr + return res + + # we could have a 1-dim or 2-dim list here + # this is equiv of np.asarray, but does object conversion + # and platform dtype preservation + # does not convert e.g. [1, "a", True] to ["1", "a", "True"] like + # np.asarray would + if is_list_like(values[0]): + values = np.array([convert(v) for v in values]) + elif isinstance(values[0], np.ndarray) and values[0].ndim == 0: + # GH#21861 see test_constructor_list_of_lists + values = np.array([convert(v) for v in values]) else: + values = convert(values) - # drop subclass info - values = np.array(values, copy=copy) + return _ensure_2d(values) + +def _ensure_2d(values: np.ndarray) -> np.ndarray: + """ + Reshape 1D values, raise on anything else other than 2D. + """ if values.ndim == 1: values = values.reshape((values.shape[0], 1)) elif values.ndim != 2: raise ValueError(f"Must pass 2-d input. shape={values.shape}") - return values