@@ -508,11 +508,7 @@ def sanitize_array(
508
508
509
509
# the result that we want
510
510
elif subarr .ndim == 1 :
511
- if index is not None :
512
-
513
- # a 1-element ndarray
514
- if len (subarr ) != len (index ) and len (subarr ) == 1 :
515
- subarr = subarr .repeat (len (index ))
511
+ subarr = _maybe_repeat (subarr , index )
516
512
517
513
elif subarr .ndim > 1 :
518
514
if isinstance (data , np .ndarray ):
@@ -521,16 +517,7 @@ def sanitize_array(
521
517
subarr = com .asarray_tuplesafe (data , dtype = dtype )
522
518
523
519
if not (is_extension_array_dtype (subarr .dtype ) or is_extension_array_dtype (dtype )):
524
- # This is to prevent mixed-type Series getting all casted to
525
- # NumPy string type, e.g. NaN --> '-1#IND'.
526
- if issubclass (subarr .dtype .type , str ):
527
- # GH#16605
528
- # If not empty convert the data to dtype
529
- # GH#19853: If data is a scalar, subarr has already the result
530
- if not lib .is_scalar (data ):
531
- if not np .all (isna (data )):
532
- data = np .array (data , dtype = dtype , copy = False )
533
- subarr = np .array (data , dtype = object , copy = copy )
520
+ subarr = _sanitize_str_dtypes (subarr , data , dtype , copy )
534
521
535
522
is_object_or_str_dtype = is_object_dtype (dtype ) or is_string_dtype (dtype )
536
523
if is_object_dtype (subarr .dtype ) and not is_object_or_str_dtype :
@@ -541,6 +528,37 @@ def sanitize_array(
541
528
return subarr
542
529
543
530
531
+ def _sanitize_str_dtypes (
532
+ result : np .ndarray , data , dtype : Optional [DtypeObj ], copy : bool
533
+ ) -> np .ndarray :
534
+ """
535
+ Ensure we have a dtype that is supported by pandas.
536
+ """
537
+
538
+ # This is to prevent mixed-type Series getting all casted to
539
+ # NumPy string type, e.g. NaN --> '-1#IND'.
540
+ if issubclass (result .dtype .type , str ):
541
+ # GH#16605
542
+ # If not empty convert the data to dtype
543
+ # GH#19853: If data is a scalar, result has already the result
544
+ if not lib .is_scalar (data ):
545
+ if not np .all (isna (data )):
546
+ data = np .array (data , dtype = dtype , copy = False )
547
+ result = np .array (data , dtype = object , copy = copy )
548
+ return result
549
+
550
+
551
+ def _maybe_repeat (arr : ArrayLike , index : Optional [Index ]) -> ArrayLike :
552
+ """
553
+ If we have a length-1 array and an index describing how long we expect
554
+ the result to be, repeat the array.
555
+ """
556
+ if index is not None :
557
+ if 1 == len (arr ) != len (index ):
558
+ arr = arr .repeat (len (index ))
559
+ return arr
560
+
561
+
544
562
def _try_cast (arr , dtype : Optional [DtypeObj ], copy : bool , raise_cast_failure : bool ):
545
563
"""
546
564
Convert input to numpy ndarray and optionally cast to a given dtype.
0 commit comments