Skip to content

Commit 133d247

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: helpers for sanitize_array (pandas-dev#38553)
1 parent 9901c8c commit 133d247

File tree

1 file changed

+33
-15
lines changed

1 file changed

+33
-15
lines changed

pandas/core/construction.py

+33-15
Original file line numberDiff line numberDiff line change
@@ -508,11 +508,7 @@ def sanitize_array(
508508

509509
# the result that we want
510510
elif subarr.ndim == 1:
511-
if index is not None:
512-
513-
# a 1-element ndarray
514-
if len(subarr) != len(index) and len(subarr) == 1:
515-
subarr = subarr.repeat(len(index))
511+
subarr = _maybe_repeat(subarr, index)
516512

517513
elif subarr.ndim > 1:
518514
if isinstance(data, np.ndarray):
@@ -521,16 +517,7 @@ def sanitize_array(
521517
subarr = com.asarray_tuplesafe(data, dtype=dtype)
522518

523519
if not (is_extension_array_dtype(subarr.dtype) or is_extension_array_dtype(dtype)):
524-
# This is to prevent mixed-type Series getting all casted to
525-
# NumPy string type, e.g. NaN --> '-1#IND'.
526-
if issubclass(subarr.dtype.type, str):
527-
# GH#16605
528-
# If not empty convert the data to dtype
529-
# GH#19853: If data is a scalar, subarr has already the result
530-
if not lib.is_scalar(data):
531-
if not np.all(isna(data)):
532-
data = np.array(data, dtype=dtype, copy=False)
533-
subarr = np.array(data, dtype=object, copy=copy)
520+
subarr = _sanitize_str_dtypes(subarr, data, dtype, copy)
534521

535522
is_object_or_str_dtype = is_object_dtype(dtype) or is_string_dtype(dtype)
536523
if is_object_dtype(subarr.dtype) and not is_object_or_str_dtype:
@@ -541,6 +528,37 @@ def sanitize_array(
541528
return subarr
542529

543530

531+
def _sanitize_str_dtypes(
532+
result: np.ndarray, data, dtype: Optional[DtypeObj], copy: bool
533+
) -> np.ndarray:
534+
"""
535+
Ensure we have a dtype that is supported by pandas.
536+
"""
537+
538+
# This is to prevent mixed-type Series getting all casted to
539+
# NumPy string type, e.g. NaN --> '-1#IND'.
540+
if issubclass(result.dtype.type, str):
541+
# GH#16605
542+
# If not empty convert the data to dtype
543+
# GH#19853: If data is a scalar, result has already the result
544+
if not lib.is_scalar(data):
545+
if not np.all(isna(data)):
546+
data = np.array(data, dtype=dtype, copy=False)
547+
result = np.array(data, dtype=object, copy=copy)
548+
return result
549+
550+
551+
def _maybe_repeat(arr: ArrayLike, index: Optional[Index]) -> ArrayLike:
552+
"""
553+
If we have a length-1 array and an index describing how long we expect
554+
the result to be, repeat the array.
555+
"""
556+
if index is not None:
557+
if 1 == len(arr) != len(index):
558+
arr = arr.repeat(len(index))
559+
return arr
560+
561+
544562
def _try_cast(arr, dtype: Optional[DtypeObj], copy: bool, raise_cast_failure: bool):
545563
"""
546564
Convert input to numpy ndarray and optionally cast to a given dtype.

0 commit comments

Comments
 (0)