@@ -545,8 +545,25 @@ def sanitize_array(
545
545
data = construct_1d_arraylike_from_scalar (data , len (index ), dtype )
546
546
return data
547
547
548
+ elif isinstance (data , ABCExtensionArray ):
549
+ # it is already ensured above this is not a PandasArray
550
+ # Until GH#49309 is fixed this check needs to come before the
551
+ # ExtensionDtype check
552
+ if dtype is not None :
553
+ subarr = data .astype (dtype , copy = copy )
554
+ elif copy :
555
+ subarr = data .copy ()
556
+ else :
557
+ subarr = data
558
+
559
+ elif isinstance (dtype , ExtensionDtype ):
560
+ # create an extension array from its dtype
561
+ _sanitize_non_ordered (data )
562
+ cls = dtype .construct_array_type ()
563
+ subarr = cls ._from_sequence (data , dtype = dtype , copy = copy )
564
+
548
565
# GH#846
549
- if isinstance (data , np .ndarray ):
566
+ elif isinstance (data , np .ndarray ):
550
567
if isinstance (data , np .matrix ):
551
568
data = data .A
552
569
@@ -556,7 +573,10 @@ def sanitize_array(
556
573
# GH 47391 numpy > 1.24 will raise a RuntimeError for nan -> int
557
574
# casting aligning with IntCastingNaNError below
558
575
with np .errstate (invalid = "ignore" ):
559
- subarr = _try_cast (data , dtype , copy )
576
+ # GH#15832: Check if we are requesting a numeric dtype and
577
+ # that we can convert the data to the requested dtype.
578
+ subarr = maybe_cast_to_integer_array (data , dtype )
579
+
560
580
except IntCastingNaNError :
561
581
warnings .warn (
562
582
"In a future version, passing float-dtype values containing NaN "
@@ -582,28 +602,27 @@ def sanitize_array(
582
602
# we will try to copy by-definition here
583
603
subarr = _try_cast (data , dtype , copy )
584
604
585
- elif isinstance (data , ABCExtensionArray ):
586
- # it is already ensured above this is not a PandasArray
587
- subarr = data
588
-
589
- if dtype is not None :
590
- subarr = subarr .astype (dtype , copy = copy )
591
- elif copy :
592
- subarr = subarr .copy ()
605
+ elif hasattr (data , "__array__" ):
606
+ # e.g. dask array GH#38645
607
+ data = np .array (data , copy = copy )
608
+ return sanitize_array (
609
+ data ,
610
+ index = index ,
611
+ dtype = dtype ,
612
+ copy = False ,
613
+ allow_2d = allow_2d ,
614
+ )
593
615
594
616
else :
595
- if isinstance (data , (set , frozenset )):
596
- # Raise only for unordered sets, e.g., not for dict_keys
597
- raise TypeError (f"'{ type (data ).__name__ } ' type is unordered" )
598
-
617
+ _sanitize_non_ordered (data )
599
618
# materialize e.g. generators, convert e.g. tuples, abc.ValueView
600
- if hasattr (data , "__array__" ):
601
- # e.g. dask array GH#38645
602
- data = np .array (data , copy = copy )
603
- else :
604
- data = list (data )
619
+ data = list (data )
605
620
606
- if dtype is not None or len (data ) == 0 :
621
+ if len (data ) == 0 and dtype is None :
622
+ # We default to float64, matching numpy
623
+ subarr = np .array ([], dtype = np .float64 )
624
+
625
+ elif dtype is not None :
607
626
try :
608
627
subarr = _try_cast (data , dtype , copy )
609
628
except ValueError :
@@ -658,6 +677,14 @@ def range_to_ndarray(rng: range) -> np.ndarray:
658
677
return arr
659
678
660
679
680
+ def _sanitize_non_ordered (data ) -> None :
681
+ """
682
+ Raise only for unordered sets, e.g., not for dict_keys
683
+ """
684
+ if isinstance (data , (set , frozenset )):
685
+ raise TypeError (f"'{ type (data ).__name__ } ' type is unordered" )
686
+
687
+
661
688
def _sanitize_ndim (
662
689
result : ArrayLike ,
663
690
data ,
@@ -728,7 +755,7 @@ def _maybe_repeat(arr: ArrayLike, index: Index | None) -> ArrayLike:
728
755
729
756
def _try_cast (
730
757
arr : list | np .ndarray ,
731
- dtype : DtypeObj | None ,
758
+ dtype : np . dtype | None ,
732
759
copy : bool ,
733
760
) -> ArrayLike :
734
761
"""
@@ -738,7 +765,7 @@ def _try_cast(
738
765
----------
739
766
arr : ndarray or list
740
767
Excludes: ExtensionArray, Series, Index.
741
- dtype : np.dtype, ExtensionDtype or None
768
+ dtype : np.dtype or None
742
769
copy : bool
743
770
If False, don't copy the data if not needed.
744
771
@@ -771,12 +798,6 @@ def _try_cast(
771
798
return varr
772
799
return maybe_infer_to_datetimelike (varr )
773
800
774
- elif isinstance (dtype , ExtensionDtype ):
775
- # create an extension array from its dtype
776
- array_type = dtype .construct_array_type ()._from_sequence
777
- subarr = array_type (arr , dtype = dtype , copy = copy )
778
- return subarr
779
-
780
801
elif is_object_dtype (dtype ):
781
802
if not is_ndarray :
782
803
subarr = construct_1d_object_array_from_listlike (arr )
0 commit comments