Skip to content

Commit 6b4fa02

Browse files
authored
REF: simplify sanitize_array (#49347)
REF: simpify sanitize_array
1 parent bcb8346 commit 6b4fa02

File tree

2 files changed

+52
-29
lines changed

2 files changed

+52
-29
lines changed

pandas/core/construction.py

+49-28
Original file line numberDiff line numberDiff line change
@@ -545,8 +545,25 @@ def sanitize_array(
545545
data = construct_1d_arraylike_from_scalar(data, len(index), dtype)
546546
return data
547547

548+
elif isinstance(data, ABCExtensionArray):
549+
# it is already ensured above this is not a PandasArray
550+
# Until GH#49309 is fixed this check needs to come before the
551+
# ExtensionDtype check
552+
if dtype is not None:
553+
subarr = data.astype(dtype, copy=copy)
554+
elif copy:
555+
subarr = data.copy()
556+
else:
557+
subarr = data
558+
559+
elif isinstance(dtype, ExtensionDtype):
560+
# create an extension array from its dtype
561+
_sanitize_non_ordered(data)
562+
cls = dtype.construct_array_type()
563+
subarr = cls._from_sequence(data, dtype=dtype, copy=copy)
564+
548565
# GH#846
549-
if isinstance(data, np.ndarray):
566+
elif isinstance(data, np.ndarray):
550567
if isinstance(data, np.matrix):
551568
data = data.A
552569

@@ -556,7 +573,10 @@ def sanitize_array(
556573
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
557574
# casting aligning with IntCastingNaNError below
558575
with np.errstate(invalid="ignore"):
559-
subarr = _try_cast(data, dtype, copy)
576+
# GH#15832: Check if we are requesting a numeric dtype and
577+
# that we can convert the data to the requested dtype.
578+
subarr = maybe_cast_to_integer_array(data, dtype)
579+
560580
except IntCastingNaNError:
561581
warnings.warn(
562582
"In a future version, passing float-dtype values containing NaN "
@@ -582,28 +602,27 @@ def sanitize_array(
582602
# we will try to copy by-definition here
583603
subarr = _try_cast(data, dtype, copy)
584604

585-
elif isinstance(data, ABCExtensionArray):
586-
# it is already ensured above this is not a PandasArray
587-
subarr = data
588-
589-
if dtype is not None:
590-
subarr = subarr.astype(dtype, copy=copy)
591-
elif copy:
592-
subarr = subarr.copy()
605+
elif hasattr(data, "__array__"):
606+
# e.g. dask array GH#38645
607+
data = np.array(data, copy=copy)
608+
return sanitize_array(
609+
data,
610+
index=index,
611+
dtype=dtype,
612+
copy=False,
613+
allow_2d=allow_2d,
614+
)
593615

594616
else:
595-
if isinstance(data, (set, frozenset)):
596-
# Raise only for unordered sets, e.g., not for dict_keys
597-
raise TypeError(f"'{type(data).__name__}' type is unordered")
598-
617+
_sanitize_non_ordered(data)
599618
# materialize e.g. generators, convert e.g. tuples, abc.ValueView
600-
if hasattr(data, "__array__"):
601-
# e.g. dask array GH#38645
602-
data = np.array(data, copy=copy)
603-
else:
604-
data = list(data)
619+
data = list(data)
605620

606-
if dtype is not None or len(data) == 0:
621+
if len(data) == 0 and dtype is None:
622+
# We default to float64, matching numpy
623+
subarr = np.array([], dtype=np.float64)
624+
625+
elif dtype is not None:
607626
try:
608627
subarr = _try_cast(data, dtype, copy)
609628
except ValueError:
@@ -658,6 +677,14 @@ def range_to_ndarray(rng: range) -> np.ndarray:
658677
return arr
659678

660679

680+
def _sanitize_non_ordered(data) -> None:
681+
"""
682+
Raise only for unordered sets, e.g., not for dict_keys
683+
"""
684+
if isinstance(data, (set, frozenset)):
685+
raise TypeError(f"'{type(data).__name__}' type is unordered")
686+
687+
661688
def _sanitize_ndim(
662689
result: ArrayLike,
663690
data,
@@ -728,7 +755,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
728755

729756
def _try_cast(
730757
arr: list | np.ndarray,
731-
dtype: DtypeObj | None,
758+
dtype: np.dtype | None,
732759
copy: bool,
733760
) -> ArrayLike:
734761
"""
@@ -738,7 +765,7 @@ def _try_cast(
738765
----------
739766
arr : ndarray or list
740767
Excludes: ExtensionArray, Series, Index.
741-
dtype : np.dtype, ExtensionDtype or None
768+
dtype : np.dtype or None
742769
copy : bool
743770
If False, don't copy the data if not needed.
744771
@@ -771,12 +798,6 @@ def _try_cast(
771798
return varr
772799
return maybe_infer_to_datetimelike(varr)
773800

774-
elif isinstance(dtype, ExtensionDtype):
775-
# create an extension array from its dtype
776-
array_type = dtype.construct_array_type()._from_sequence
777-
subarr = array_type(arr, dtype=dtype, copy=copy)
778-
return subarr
779-
780801
elif is_object_dtype(dtype):
781802
if not is_ndarray:
782803
subarr = construct_1d_object_array_from_listlike(arr)

pandas/core/internals/construction.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
ArrayLike,
2323
DtypeObj,
2424
Manager,
25+
npt,
2526
)
2627
from pandas.util._exceptions import find_stack_level
2728

@@ -1032,7 +1033,7 @@ def _validate_or_indexify_columns(
10321033

10331034

10341035
def _convert_object_array(
1035-
content: list[np.ndarray], dtype: DtypeObj | None
1036+
content: list[npt.NDArray[np.object_]], dtype: DtypeObj | None
10361037
) -> list[ArrayLike]:
10371038
"""
10381039
Internal function to convert object array.
@@ -1059,6 +1060,7 @@ def convert(arr):
10591060
arr = cls._from_sequence(arr, dtype=dtype, copy=False)
10601061
else:
10611062
arr = maybe_cast_to_datetime(arr, dtype)
1063+
10621064
return arr
10631065

10641066
arrays = [convert(arr) for arr in content]

0 commit comments

Comments
 (0)