From 3f50bbf1825161c8efa34d4cf3984197ca242499 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 20 May 2021 13:31:01 -0700 Subject: [PATCH] REF: simplify sanitize_array --- pandas/core/construction.py | 35 ++++++++++++--------------- pandas/core/internals/construction.py | 6 ++--- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/pandas/core/construction.py b/pandas/core/construction.py index 0fef02b1489ac..e83aa02f25ada 100644 --- a/pandas/core/construction.py +++ b/pandas/core/construction.py @@ -6,7 +6,6 @@ """ from __future__ import annotations -from collections import abc from typing import ( TYPE_CHECKING, Any, @@ -501,6 +500,16 @@ def sanitize_array( if dtype is None: dtype = data.dtype data = lib.item_from_zerodim(data) + elif isinstance(data, range): + # GH#16804 + data = np.arange(data.start, data.stop, data.step, dtype="int64") + copy = False + + if not is_list_like(data): + if index is None: + raise ValueError("index must be specified when data is not list-like") + data = construct_1d_arraylike_from_scalar(data, len(index), dtype) + return data # GH#846 if isinstance(data, np.ndarray): @@ -525,14 +534,16 @@ def sanitize_array( subarr = subarr.copy() return subarr - elif isinstance(data, (list, tuple, abc.Set, abc.ValuesView)) and len(data) > 0: - # TODO: deque, array.array + else: if isinstance(data, (set, frozenset)): # Raise only for unordered sets, e.g., not for dict_keys raise TypeError(f"'{type(data).__name__}' type is unordered") + + # materialize e.g. generators, convert e.g. tuples, abc.ValueView + # TODO: non-standard array-likes we can convert to ndarray more efficiently? data = list(data) - if dtype is not None: + if dtype is not None or len(data) == 0: subarr = _try_cast(data, dtype, copy, raise_cast_failure) else: subarr = maybe_convert_platform(data) @@ -541,22 +552,6 @@ def sanitize_array( # "ExtensionArray") subarr = maybe_cast_to_datetime(subarr, dtype) # type: ignore[assignment] - elif isinstance(data, range): - # GH#16804 - arr = np.arange(data.start, data.stop, data.step, dtype="int64") - subarr = _try_cast(arr, dtype, copy, raise_cast_failure) - - elif not is_list_like(data): - if index is None: - raise ValueError("index must be specified when data is not list-like") - subarr = construct_1d_arraylike_from_scalar(data, len(index), dtype) - - else: - # realize e.g. generators - # TODO: non-standard array-likes we can convert to ndarray more efficiently? - data = list(data) - subarr = _try_cast(data, dtype, copy, raise_cast_failure) - subarr = _sanitize_ndim(subarr, data, dtype, index) if not ( diff --git a/pandas/core/internals/construction.py b/pandas/core/internals/construction.py index 06d30d6ed72e8..fd8896a8a7e6e 100644 --- a/pandas/core/internals/construction.py +++ b/pandas/core/internals/construction.py @@ -554,10 +554,8 @@ def convert(v): else: - # drop subclass info, do not copy data - values = np.asarray(values) - if copy: - values = values.copy() + # drop subclass info + values = np.array(values, copy=copy) if values.ndim == 1: values = values.reshape((values.shape[0], 1))