3
3
constructors before passing them to a BlockManager.
4
4
"""
5
5
from collections import abc
6
- from typing import Tuple
6
+ from typing import Dict , List , Optional , Tuple , Union
7
7
8
8
import numpy as np
9
9
import numpy .ma as ma
10
10
11
11
from pandas ._libs import lib
12
+ from pandas ._typing import Axis , Dtype , Scalar
12
13
13
14
from pandas .core .dtypes .cast import (
14
15
construct_1d_arraylike_from_scalar ,
@@ -522,29 +523,38 @@ def to_arrays(data, columns, coerce_float=False, dtype=None):
522
523
return _list_to_arrays (data , columns , coerce_float = coerce_float , dtype = dtype )
523
524
524
525
525
- def _list_to_arrays (data , columns , coerce_float = False , dtype = None ):
526
+ def _list_to_arrays (
527
+ data : List [Scalar ],
528
+ columns : Union [Index , List ],
529
+ coerce_float : bool = False ,
530
+ dtype : Optional [Dtype ] = None ,
531
+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
526
532
if len (data ) > 0 and isinstance (data [0 ], tuple ):
527
533
content = list (lib .to_object_array_tuples (data ).T )
528
534
else :
529
535
# list of lists
530
536
content = list (lib .to_object_array (data ).T )
531
537
# gh-26429 do not raise user-facing AssertionError
532
538
try :
533
- result = _convert_object_array (
534
- content , columns , dtype = dtype , coerce_float = coerce_float
535
- )
539
+ columns = _validate_or_indexify_columns (content , columns )
540
+ result = _convert_object_array (content , dtype = dtype , coerce_float = coerce_float )
536
541
except AssertionError as e :
537
542
raise ValueError (e ) from e
538
- return result
543
+ return result , columns
539
544
540
545
541
- def _list_of_series_to_arrays (data , columns , coerce_float = False , dtype = None ):
546
+ def _list_of_series_to_arrays (
547
+ data : List ,
548
+ columns : Union [Index , List ],
549
+ coerce_float : bool = False ,
550
+ dtype : Optional [Dtype ] = None ,
551
+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
542
552
if columns is None :
543
553
# We know pass_data is non-empty because data[0] is a Series
544
554
pass_data = [x for x in data if isinstance (x , (ABCSeries , ABCDataFrame ))]
545
555
columns = get_objs_combined_axis (pass_data , sort = False )
546
556
547
- indexer_cache = {}
557
+ indexer_cache : Dict [ int , Scalar ] = {}
548
558
549
559
aligned_values = []
550
560
for s in data :
@@ -564,14 +574,19 @@ def _list_of_series_to_arrays(data, columns, coerce_float=False, dtype=None):
564
574
565
575
if values .dtype == np .object_ :
566
576
content = list (values .T )
567
- return _convert_object_array (
568
- content , columns , dtype = dtype , coerce_float = coerce_float
569
- )
577
+ columns = _validate_or_indexify_columns ( content , columns )
578
+ content = _convert_object_array ( content , dtype = dtype , coerce_float = coerce_float )
579
+ return content , columns
570
580
else :
571
581
return values .T , columns
572
582
573
583
574
- def _list_of_dict_to_arrays (data , columns , coerce_float = False , dtype = None ):
584
+ def _list_of_dict_to_arrays (
585
+ data : List ,
586
+ columns : Union [Index , List ],
587
+ coerce_float : bool = False ,
588
+ dtype : Optional [Dtype ] = None ,
589
+ ) -> Tuple [List [Scalar ], Union [Index , List [Axis ]]]:
575
590
"""
576
591
Convert list of dicts to numpy arrays
577
592
@@ -603,22 +618,85 @@ def _list_of_dict_to_arrays(data, columns, coerce_float=False, dtype=None):
603
618
data = [(type (d ) is dict ) and d or dict (d ) for d in data ]
604
619
605
620
content = list (lib .dicts_to_array (data , list (columns )).T )
606
- return _convert_object_array (
607
- content , columns , dtype = dtype , coerce_float = coerce_float
608
- )
621
+ columns = _validate_or_indexify_columns ( content , columns )
622
+ content = _convert_object_array ( content , dtype = dtype , coerce_float = coerce_float )
623
+ return content , columns
609
624
610
625
611
- def _convert_object_array (content , columns , coerce_float = False , dtype = None ):
626
+ def _validate_or_indexify_columns (
627
+ content : List , columns : Union [Index , List , None ]
628
+ ) -> Union [Index , List [Axis ]]:
629
+ """
630
+ If columns is None, make numbers as column names; Otherwise, validate that
631
+ columns have valid length.
632
+
633
+ Parameters
634
+ ----------
635
+ content: list of data
636
+ columns: Iterable or None
637
+
638
+ Returns
639
+ -------
640
+ columns: If columns is Iterable, return as is; If columns is None, assign
641
+ positional column index value as columns.
642
+
643
+ Raises
644
+ ------
645
+ 1. AssertionError when content is not composed of list of lists, and if
646
+ length of columns is not equal to length of content.
647
+ 2. ValueError when content is list of lists, but length of each sub-list
648
+ is not equal
649
+ 3. ValueError when content is list of lists, but length of sub-list is
650
+ not equal to length of content
651
+ """
612
652
if columns is None :
613
653
columns = ibase .default_index (len (content ))
614
654
else :
615
- if len (columns ) != len (content ): # pragma: no cover
655
+
656
+ # Add mask for data which is composed of list of lists
657
+ is_mi_list = isinstance (columns , list ) and all (
658
+ isinstance (col , list ) for col in columns
659
+ )
660
+
661
+ if not is_mi_list and len (columns ) != len (content ): # pragma: no cover
616
662
# caller's responsibility to check for this...
617
663
raise AssertionError (
618
664
f"{ len (columns )} columns passed, passed data had "
619
665
f"{ len (content )} columns"
620
666
)
667
+ elif is_mi_list :
668
+
669
+ # check if nested list column, length of each sub-list should be equal
670
+ if len ({len (col ) for col in columns }) > 1 :
671
+ raise ValueError (
672
+ "Length of columns passed for MultiIndex columns is different"
673
+ )
674
+
675
+ # if columns is not empty and length of sublist is not equal to content
676
+ elif columns and len (columns [0 ]) != len (content ):
677
+ raise ValueError (
678
+ f"{ len (columns [0 ])} columns passed, passed data had "
679
+ f"{ len (content )} columns"
680
+ )
681
+ return columns
682
+
683
+
684
+ def _convert_object_array (
685
+ content : List [Scalar ], coerce_float : bool = False , dtype : Optional [Dtype ] = None
686
+ ) -> List [Scalar ]:
687
+ """
688
+ Internal function ot convert object array.
689
+
690
+ Parameters
691
+ ----------
692
+ content: list of processed data records
693
+ coerce_float: bool, to coerce floats or not, default is False
694
+ dtype: np.dtype, default is None
621
695
696
+ Returns
697
+ -------
698
+ arrays: casted content if not object dtype, otherwise return as is in list.
699
+ """
622
700
# provide soft conversion of object dtypes
623
701
def convert (arr ):
624
702
if dtype != object and dtype != np .object :
@@ -628,7 +706,7 @@ def convert(arr):
628
706
629
707
arrays = [convert (arr ) for arr in content ]
630
708
631
- return arrays , columns
709
+ return arrays
632
710
633
711
634
712
# ---------------------------------------------------------------------
0 commit comments