@@ -525,59 +525,49 @@ def to_arrays(
525
525
if columns is not None :
526
526
return [[]] * len (columns ), columns
527
527
return [], [] # columns if columns is not None else []
528
- if isinstance (data [0 ], (list , tuple )):
529
- return _list_to_arrays (data , columns , coerce_float = coerce_float , dtype = dtype )
530
- elif isinstance (data [0 ], abc .Mapping ):
531
- return _list_of_dict_to_arrays (
532
- data , columns , coerce_float = coerce_float , dtype = dtype
533
- )
534
- elif isinstance (data [0 ], ABCSeries ):
535
- return _list_of_series_to_arrays (
536
- data , columns , coerce_float = coerce_float , dtype = dtype
537
- )
528
+
538
529
elif isinstance (data [0 ], Categorical ):
539
530
if columns is None :
540
531
columns = ibase .default_index (len (data ))
541
532
return data , columns
542
- elif (
543
- isinstance (data , (np .ndarray , ABCSeries , Index ))
544
- and data .dtype .names is not None
545
- ):
546
533
534
+ elif isinstance (data , np .ndarray ) and data .dtype .names is not None :
535
+ # e.g. recarray
547
536
columns = list (data .dtype .names )
548
537
arrays = [data [k ] for k in columns ]
549
538
return arrays , columns
539
+
540
+ if isinstance (data [0 ], (list , tuple )):
541
+ content , columns = _list_to_arrays (data , columns )
542
+ elif isinstance (data [0 ], abc .Mapping ):
543
+ content , columns = _list_of_dict_to_arrays (data , columns )
544
+ elif isinstance (data [0 ], ABCSeries ):
545
+ content , columns = _list_of_series_to_arrays (data , columns )
550
546
else :
551
547
# last ditch effort
552
548
data = [tuple (x ) for x in data ]
553
- return _list_to_arrays (data , columns , coerce_float = coerce_float , dtype = dtype )
549
+ content , columns = _list_to_arrays (data , columns )
550
+
551
+ content , columns = _finalize_columns_and_data (content , columns , dtype , coerce_float )
552
+ return content , columns
554
553
555
554
556
555
def _list_to_arrays (
557
556
data : List [Scalar ],
558
557
columns : Union [Index , List ],
559
- coerce_float : bool = False ,
560
- dtype : Optional [DtypeObj ] = None ,
561
558
) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
562
- if len (data ) > 0 and isinstance (data [0 ], tuple ):
563
- content = list (lib .to_object_array_tuples (data ).T )
559
+ # Note: we already check len(data) > 0 before getting hre
560
+ if isinstance (data [0 ], tuple ):
561
+ content = lib .to_object_array_tuples (data )
564
562
else :
565
563
# list of lists
566
- content = list (lib .to_object_array (data ).T )
567
- # gh-26429 do not raise user-facing AssertionError
568
- try :
569
- columns = _validate_or_indexify_columns (content , columns )
570
- result = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
571
- except AssertionError as e :
572
- raise ValueError (e ) from e
573
- return result , columns
564
+ content = lib .to_object_array (data )
565
+ return content , columns
574
566
575
567
576
568
def _list_of_series_to_arrays (
577
569
data : List ,
578
570
columns : Union [Index , List ],
579
- coerce_float : bool = False ,
580
- dtype : Optional [DtypeObj ] = None ,
581
571
) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
582
572
if columns is None :
583
573
# We know pass_data is non-empty because data[0] is a Series
@@ -600,22 +590,14 @@ def _list_of_series_to_arrays(
600
590
values = extract_array (s , extract_numpy = True )
601
591
aligned_values .append (algorithms .take_1d (values , indexer ))
602
592
603
- values = np .vstack (aligned_values )
593
+ content = np .vstack (aligned_values )
604
594
605
- if values .dtype == np .object_ :
606
- content = list (values .T )
607
- columns = _validate_or_indexify_columns (content , columns )
608
- content = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
609
- return content , columns
610
- else :
611
- return values .T , columns
595
+ return content , columns
612
596
613
597
614
598
def _list_of_dict_to_arrays (
615
599
data : List [Dict ],
616
600
columns : Union [Index , List ],
617
- coerce_float : bool = False ,
618
- dtype : Optional [DtypeObj ] = None ,
619
601
) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
620
602
"""
621
603
Convert list of dicts to numpy arrays
@@ -630,8 +612,6 @@ def _list_of_dict_to_arrays(
630
612
data : iterable
631
613
collection of records (OrderedDict, dict)
632
614
columns: iterables or None
633
- coerce_float : bool
634
- dtype : np.dtype
635
615
636
616
Returns
637
617
-------
@@ -647,9 +627,29 @@ def _list_of_dict_to_arrays(
647
627
# classes
648
628
data = [(type (d ) is dict ) and d or dict (d ) for d in data ]
649
629
650
- content = list (lib .dicts_to_array (data , list (columns )).T )
651
- columns = _validate_or_indexify_columns (content , columns )
652
- content = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
630
+ content = lib .dicts_to_array (data , list (columns ))
631
+ return content , columns
632
+
633
+
634
+ def _finalize_columns_and_data (
635
+ content : np .ndarray ,
636
+ columns : Optional [Union [Index , List ]],
637
+ dtype : Optional [DtypeObj ],
638
+ coerce_float : bool ,
639
+ ) -> Tuple [List [np .ndarray ], Union [Index , List [Axis ]]]:
640
+ """
641
+ Ensure we have valid columns, cast object dtypes if possible.
642
+ """
643
+ content = list (content .T )
644
+
645
+ try :
646
+ columns = _validate_or_indexify_columns (content , columns )
647
+ except AssertionError as err :
648
+ # GH#26429 do not raise user-facing AssertionError
649
+ raise ValueError (err ) from err
650
+
651
+ if len (content ) and content [0 ].dtype == np .object_ :
652
+ content = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
653
653
return content , columns
654
654
655
655
0 commit comments