Skip to content

Commit cfb9bbe

Browse files
jorisvandenbosschejreback
authored andcommitted
CLN: clean-up sanitize_array series construction (#26979)
1 parent a65b2e3 commit cfb9bbe

File tree

1 file changed

+36
-43
lines changed

1 file changed

+36
-43
lines changed

pandas/core/internals/construction.py

+36-43
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
is_extension_array_dtype, is_extension_type, is_float_dtype,
2222
is_integer_dtype, is_iterator, is_list_like, is_object_dtype, pandas_dtype)
2323
from pandas.core.dtypes.generic import (
24-
ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPandasArray,
25-
ABCPeriodIndex, ABCSeries, ABCTimedeltaIndex)
24+
ABCDataFrame, ABCDatetimeIndex, ABCIndexClass, ABCPeriodIndex, ABCSeries,
25+
ABCTimedeltaIndex)
2626
from pandas.core.dtypes.missing import isna
2727

2828
from pandas.core import algorithms, common as com
@@ -570,59 +570,40 @@ def sanitize_array(data, index, dtype=None, copy=False,
570570
else:
571571
data = data.copy()
572572

573+
# extract ndarray or ExtensionArray, ensure we have no PandasArray
573574
data = extract_array(data, extract_numpy=True)
574575

575576
# GH#846
576577
if isinstance(data, np.ndarray):
577578

578-
if dtype is not None:
579-
subarr = np.array(data, copy=False)
580-
579+
if (dtype is not None
580+
and is_float_dtype(data.dtype) and is_integer_dtype(dtype)):
581581
# possibility of nan -> garbage
582-
if is_float_dtype(data.dtype) and is_integer_dtype(dtype):
583-
try:
584-
subarr = _try_cast(data, True, dtype, copy,
585-
True)
586-
except ValueError:
587-
if copy:
588-
subarr = data.copy()
589-
else:
590-
subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
591-
elif isinstance(data, Index):
592-
# don't coerce Index types
593-
# e.g. indexes can have different conversions (so don't fast path
594-
# them)
595-
# GH#6140
596-
subarr = sanitize_index(data, index, copy=copy)
582+
try:
583+
subarr = _try_cast(data, dtype, copy, True)
584+
except ValueError:
585+
if copy:
586+
subarr = data.copy()
587+
else:
588+
subarr = np.array(data, copy=False)
597589
else:
598-
599590
# we will try to copy be-definition here
600-
subarr = _try_cast(data, True, dtype, copy, raise_cast_failure)
591+
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
601592

602593
elif isinstance(data, ExtensionArray):
603-
if isinstance(data, ABCPandasArray):
604-
# We don't want to let people put our PandasArray wrapper
605-
# (the output of Series/Index.array), into a Series. So
606-
# we explicitly unwrap it here.
607-
subarr = data.to_numpy()
608-
else:
609-
subarr = data
610-
611-
# everything else in this block must also handle ndarray's,
612-
# because we've unwrapped PandasArray into an ndarray.
594+
# it is already ensured above this is not a PandasArray
595+
subarr = data
613596

614597
if dtype is not None:
615-
subarr = data.astype(dtype)
616-
617-
if copy:
618-
subarr = data.copy()
598+
subarr = subarr.astype(dtype, copy=copy)
599+
elif copy:
600+
subarr = subarr.copy()
619601
return subarr
620602

621603
elif isinstance(data, (list, tuple)) and len(data) > 0:
622604
if dtype is not None:
623605
try:
624-
subarr = _try_cast(data, False, dtype, copy,
625-
raise_cast_failure)
606+
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
626607
except Exception:
627608
if raise_cast_failure: # pragma: no cover
628609
raise
@@ -637,9 +618,9 @@ def sanitize_array(data, index, dtype=None, copy=False,
637618
elif isinstance(data, range):
638619
# GH#16804
639620
arr = np.arange(data.start, data.stop, data.step, dtype='int64')
640-
subarr = _try_cast(arr, False, dtype, copy, raise_cast_failure)
621+
subarr = _try_cast(arr, dtype, copy, raise_cast_failure)
641622
else:
642-
subarr = _try_cast(data, False, dtype, copy, raise_cast_failure)
623+
subarr = _try_cast(data, dtype, copy, raise_cast_failure)
643624

644625
# scalar like, GH
645626
if getattr(subarr, 'ndim', 0) == 0:
@@ -701,10 +682,22 @@ def sanitize_array(data, index, dtype=None, copy=False,
701682
return subarr
702683

703684

704-
def _try_cast(arr, take_fast_path, dtype, copy, raise_cast_failure):
705-
685+
def _try_cast(arr, dtype, copy, raise_cast_failure):
686+
"""
687+
Convert input to numpy ndarray and optionally cast to a given dtype.
688+
689+
Parameters
690+
----------
691+
arr : array-like
692+
dtype : np.dtype, ExtensionDtype or None
693+
copy : bool
694+
If False, don't copy the data if not needed.
695+
raise_cast_failure : bool
696+
If True, and if a dtype is specified, raise errors during casting.
697+
Otherwise an object array is returned.
698+
"""
706699
# perf shortcut as this is the most common case
707-
if take_fast_path:
700+
if isinstance(arr, np.ndarray):
708701
if maybe_castable(arr) and not copy and dtype is None:
709702
return arr
710703

0 commit comments

Comments
 (0)