Skip to content

Commit 2caad13

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: simplify internals.construction (pandas-dev#38400)
1 parent 59ea171 commit 2caad13

File tree

1 file changed

+44
-44
lines changed

1 file changed

+44
-44
lines changed

pandas/core/internals/construction.py

+44-44
Original file line numberDiff line numberDiff line change
@@ -525,59 +525,49 @@ def to_arrays(
525525
if columns is not None:
526526
return [[]] * len(columns), columns
527527
return [], [] # columns if columns is not None else []
528-
if isinstance(data[0], (list, tuple)):
529-
return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
530-
elif isinstance(data[0], abc.Mapping):
531-
return _list_of_dict_to_arrays(
532-
data, columns, coerce_float=coerce_float, dtype=dtype
533-
)
534-
elif isinstance(data[0], ABCSeries):
535-
return _list_of_series_to_arrays(
536-
data, columns, coerce_float=coerce_float, dtype=dtype
537-
)
528+
538529
elif isinstance(data[0], Categorical):
539530
if columns is None:
540531
columns = ibase.default_index(len(data))
541532
return data, columns
542-
elif (
543-
isinstance(data, (np.ndarray, ABCSeries, Index))
544-
and data.dtype.names is not None
545-
):
546533

534+
elif isinstance(data, np.ndarray) and data.dtype.names is not None:
535+
# e.g. recarray
547536
columns = list(data.dtype.names)
548537
arrays = [data[k] for k in columns]
549538
return arrays, columns
539+
540+
if isinstance(data[0], (list, tuple)):
541+
content, columns = _list_to_arrays(data, columns)
542+
elif isinstance(data[0], abc.Mapping):
543+
content, columns = _list_of_dict_to_arrays(data, columns)
544+
elif isinstance(data[0], ABCSeries):
545+
content, columns = _list_of_series_to_arrays(data, columns)
550546
else:
551547
# last ditch effort
552548
data = [tuple(x) for x in data]
553-
return _list_to_arrays(data, columns, coerce_float=coerce_float, dtype=dtype)
549+
content, columns = _list_to_arrays(data, columns)
550+
551+
content, columns = _finalize_columns_and_data(content, columns, dtype, coerce_float)
552+
return content, columns
554553

555554

556555
def _list_to_arrays(
557556
data: List[Scalar],
558557
columns: Union[Index, List],
559-
coerce_float: bool = False,
560-
dtype: Optional[DtypeObj] = None,
561558
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
562-
if len(data) > 0 and isinstance(data[0], tuple):
563-
content = list(lib.to_object_array_tuples(data).T)
559+
# Note: we already check len(data) > 0 before getting hre
560+
if isinstance(data[0], tuple):
561+
content = lib.to_object_array_tuples(data)
564562
else:
565563
# list of lists
566-
content = list(lib.to_object_array(data).T)
567-
# gh-26429 do not raise user-facing AssertionError
568-
try:
569-
columns = _validate_or_indexify_columns(content, columns)
570-
result = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
571-
except AssertionError as e:
572-
raise ValueError(e) from e
573-
return result, columns
564+
content = lib.to_object_array(data)
565+
return content, columns
574566

575567

576568
def _list_of_series_to_arrays(
577569
data: List,
578570
columns: Union[Index, List],
579-
coerce_float: bool = False,
580-
dtype: Optional[DtypeObj] = None,
581571
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
582572
if columns is None:
583573
# We know pass_data is non-empty because data[0] is a Series
@@ -600,22 +590,14 @@ def _list_of_series_to_arrays(
600590
values = extract_array(s, extract_numpy=True)
601591
aligned_values.append(algorithms.take_1d(values, indexer))
602592

603-
values = np.vstack(aligned_values)
593+
content = np.vstack(aligned_values)
604594

605-
if values.dtype == np.object_:
606-
content = list(values.T)
607-
columns = _validate_or_indexify_columns(content, columns)
608-
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
609-
return content, columns
610-
else:
611-
return values.T, columns
595+
return content, columns
612596

613597

614598
def _list_of_dict_to_arrays(
615599
data: List[Dict],
616600
columns: Union[Index, List],
617-
coerce_float: bool = False,
618-
dtype: Optional[DtypeObj] = None,
619601
) -> Tuple[List[Scalar], Union[Index, List[Axis]]]:
620602
"""
621603
Convert list of dicts to numpy arrays
@@ -630,8 +612,6 @@ def _list_of_dict_to_arrays(
630612
data : iterable
631613
collection of records (OrderedDict, dict)
632614
columns: iterables or None
633-
coerce_float : bool
634-
dtype : np.dtype
635615
636616
Returns
637617
-------
@@ -647,9 +627,29 @@ def _list_of_dict_to_arrays(
647627
# classes
648628
data = [(type(d) is dict) and d or dict(d) for d in data]
649629

650-
content = list(lib.dicts_to_array(data, list(columns)).T)
651-
columns = _validate_or_indexify_columns(content, columns)
652-
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
630+
content = lib.dicts_to_array(data, list(columns))
631+
return content, columns
632+
633+
634+
def _finalize_columns_and_data(
635+
content: np.ndarray,
636+
columns: Optional[Union[Index, List]],
637+
dtype: Optional[DtypeObj],
638+
coerce_float: bool,
639+
) -> Tuple[List[np.ndarray], Union[Index, List[Axis]]]:
640+
"""
641+
Ensure we have valid columns, cast object dtypes if possible.
642+
"""
643+
content = list(content.T)
644+
645+
try:
646+
columns = _validate_or_indexify_columns(content, columns)
647+
except AssertionError as err:
648+
# GH#26429 do not raise user-facing AssertionError
649+
raise ValueError(err) from err
650+
651+
if len(content) and content[0].dtype == np.object_:
652+
content = _convert_object_array(content, dtype=dtype, coerce_float=coerce_float)
653653
return content, columns
654654

655655

0 commit comments

Comments
 (0)