diff --git a/pandas/core/construction.py b/pandas/core/construction.py index c7db58fe8c6a3..c381496164630 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -545,8 +545,25 @@ def sanitize_array( data = construct_1d_arraylike_from_scalar(data, len(index), dtype) return data + elif isinstance(data, ABCExtensionArray): + # it is already ensured above this is not a PandasArray + # Until GH#49309 is fixed this check needs to come before the + # ExtensionDtype check + if dtype is not None: + subarr = data.astype(dtype, copy=copy) + elif copy: + subarr = data.copy() + else: + subarr = data + + elif isinstance(dtype, ExtensionDtype): + # create an extension array from its dtype + _sanitize_non_ordered(data) + cls = dtype.construct_array_type() + subarr = cls._from_sequence(data, dtype=dtype, copy=copy) + # GH#846 - if isinstance(data, np.ndarray): + elif isinstance(data, np.ndarray): if isinstance(data, np.matrix): data = data.A @@ -556,7 +573,10 @@ def sanitize_array( # GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int # casting aligning with IntCastingNaNError below with np.errstate(invalid="ignore"): - subarr = _try_cast(data, dtype, copy) + # GH#15832: Check if we are requesting a numeric dtype and + # that we can convert the data to the requested dtype. + subarr = maybe_cast_to_integer_array(data, dtype) + except IntCastingNaNError: warnings.warn( "In a future version, passing float-dtype values containing NaN " @@ -582,28 +602,27 @@ def sanitize_array( # we will try to copy by-definition here subarr = _try_cast(data, dtype, copy) - elif isinstance(data, ABCExtensionArray): - # it is already ensured above this is not a PandasArray - subarr = data - - if dtype is not None: - subarr = subarr.astype(dtype, copy=copy) - elif copy: - subarr = subarr.copy() + elif hasattr(data, "__array__"): + # e.g. dask array GH#38645 + data = np.array(data, copy=copy) + return sanitize_array( + data, + index=index, + dtype=dtype, + copy=False, + allow_2d=allow_2d, + ) else: - if isinstance(data, (set, frozenset)): - # Raise only for unordered sets, e.g., not for dict_keys - raise TypeError(f"'{type(data).__name__}' type is unordered") - + _sanitize_non_ordered(data) # materialize e.g. generators, convert e.g. tuples, abc.ValueView - if hasattr(data, "__array__"): - # e.g. dask array GH#38645 - data = np.array(data, copy=copy) - else: - data = list(data) + data = list(data) - if dtype is not None or len(data) == 0: + if len(data) == 0 and dtype is None: + # We default to float64, matching numpy + subarr = np.array([], dtype=np.float64) + + elif dtype is not None: try: subarr = _try_cast(data, dtype, copy) except ValueError: @@ -658,6 +677,14 @@ def range_to_ndarray(rng: range) -> np.ndarray: return arr +def _sanitize_non_ordered(data) -> None: + """ + Raise only for unordered sets, e.g., not for dict_keys + """ + if isinstance(data, (set, frozenset)): + raise TypeError(f"'{type(data).__name__}' type is unordered") + + def _sanitize_ndim( result: ArrayLike, data, @@ -728,7 +755,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike: def _try_cast( arr: list | np.ndarray, - dtype: DtypeObj | None, + dtype: np.dtype | None, copy: bool, ) -> ArrayLike: """ @@ -738,7 +765,7 @@ def _try_cast( ---------- arr : ndarray or list Excludes: ExtensionArray, Series, Index. - dtype : np.dtype, ExtensionDtype or None + dtype : np.dtype or None copy : bool If False, don't copy the data if not needed. @@ -771,12 +798,6 @@ def _try_cast( return varr return maybe_infer_to_datetimelike(varr) - elif isinstance(dtype, ExtensionDtype): - # create an extension array from its dtype - array_type = dtype.construct_array_type()._from_sequence - subarr = array_type(arr, dtype=dtype, copy=copy) - return subarr - elif is_object_dtype(dtype): if not is_ndarray: subarr = construct_1d_object_array_from_listlike(arr) diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 8f3c93259f0c6..ee7026663b2b6 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -22,6 +22,7 @@ ArrayLike, DtypeObj, Manager, + npt, ) from pandas.util._exceptions import find_stack_level @@ -1032,7 +1033,7 @@ def _validate_or_indexify_columns( def _convert_object_array( - content: list[np.ndarray], dtype: DtypeObj | None + content: list[npt.NDArray[np.object_]], dtype: DtypeObj | None ) -> list[ArrayLike]: """ Internal function to convert object array. @@ -1059,6 +1060,7 @@ def convert(arr): arr = cls._from_sequence(arr, dtype=dtype, copy=False) else: arr = maybe_cast_to_datetime(arr, dtype) + return arr arrays = [convert(arr) for arr in content]