11
11
List ,
12
12
Optional ,
13
13
Sequence ,
14
+ Tuple ,
14
15
Type ,
15
16
TypeVar ,
16
17
Union ,
@@ -2642,13 +2643,11 @@ def recode_for_categories(
2642
2643
return new_codes
2643
2644
2644
2645
2645
- def factorize_from_iterable (values ):
2646
+ def factorize_from_iterable (values ) -> Tuple [ np . ndarray , Index ] :
2646
2647
"""
2647
2648
Factorize an input `values` into `categories` and `codes`. Preserves
2648
2649
categorical dtype in `categories`.
2649
2650
2650
- *This is an internal function*
2651
-
2652
2651
Parameters
2653
2652
----------
2654
2653
values : list-like
@@ -2660,6 +2659,8 @@ def factorize_from_iterable(values):
2660
2659
If `values` has a categorical dtype, then `categories` is
2661
2660
a CategoricalIndex keeping the categories and order of `values`.
2662
2661
"""
2662
+ from pandas import CategoricalIndex
2663
+
2663
2664
if not is_list_like (values ):
2664
2665
raise TypeError ("Input must be list-like" )
2665
2666
@@ -2668,7 +2669,8 @@ def factorize_from_iterable(values):
2668
2669
# The Categorical we want to build has the same categories
2669
2670
# as values but its codes are by def [0, ..., len(n_categories) - 1]
2670
2671
cat_codes = np .arange (len (values .categories ), dtype = values .codes .dtype )
2671
- categories = Categorical .from_codes (cat_codes , dtype = values .dtype )
2672
+ cat = Categorical .from_codes (cat_codes , dtype = values .dtype )
2673
+ categories = CategoricalIndex (cat )
2672
2674
codes = values .codes
2673
2675
else :
2674
2676
# The value of ordered is irrelevant since we don't use cat as such,
@@ -2680,26 +2682,26 @@ def factorize_from_iterable(values):
2680
2682
return codes , categories
2681
2683
2682
2684
2683
- def factorize_from_iterables (iterables ):
2685
+ def factorize_from_iterables (iterables ) -> Tuple [ List [ np . ndarray ], List [ Index ]] :
2684
2686
"""
2685
2687
A higher-level wrapper over `factorize_from_iterable`.
2686
2688
2687
- *This is an internal function*
2688
-
2689
2689
Parameters
2690
2690
----------
2691
2691
iterables : list-like of list-likes
2692
2692
2693
2693
Returns
2694
2694
-------
2695
- codes_list : list of ndarrays
2696
- categories_list : list of Indexes
2695
+ codes : list of ndarrays
2696
+ categories : list of Indexes
2697
2697
2698
2698
Notes
2699
2699
-----
2700
2700
See `factorize_from_iterable` for more info.
2701
2701
"""
2702
2702
if len (iterables ) == 0 :
2703
- # For consistency, it should return a list of 2 lists.
2704
- return [[], []]
2705
- return map (list , zip (* (factorize_from_iterable (it ) for it in iterables )))
2703
+ # For consistency, it should return two empty lists.
2704
+ return [], []
2705
+
2706
+ codes , categories = zip (* (factorize_from_iterable (it ) for it in iterables ))
2707
+ return list (codes ), list (categories )
0 commit comments