Skip to content

Commit 986c5ec

Browse files
authored
TYP/CLN: factorize_from_iterable(s) (#40775)
1 parent 8c9621d commit 986c5ec

File tree

2 files changed

+15
-13
lines changed

2 files changed

+15
-13
lines changed

pandas/core/arrays/categorical.py

+14-12
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
List,
1212
Optional,
1313
Sequence,
14+
Tuple,
1415
Type,
1516
TypeVar,
1617
Union,
@@ -2642,13 +2643,11 @@ def recode_for_categories(
26422643
return new_codes
26432644

26442645

2645-
def factorize_from_iterable(values):
2646+
def factorize_from_iterable(values) -> Tuple[np.ndarray, Index]:
26462647
"""
26472648
Factorize an input `values` into `categories` and `codes`. Preserves
26482649
categorical dtype in `categories`.
26492650
2650-
*This is an internal function*
2651-
26522651
Parameters
26532652
----------
26542653
values : list-like
@@ -2660,6 +2659,8 @@ def factorize_from_iterable(values):
26602659
If `values` has a categorical dtype, then `categories` is
26612660
a CategoricalIndex keeping the categories and order of `values`.
26622661
"""
2662+
from pandas import CategoricalIndex
2663+
26632664
if not is_list_like(values):
26642665
raise TypeError("Input must be list-like")
26652666

@@ -2668,7 +2669,8 @@ def factorize_from_iterable(values):
26682669
# The Categorical we want to build has the same categories
26692670
# as values but its codes are by def [0, ..., len(n_categories) - 1]
26702671
cat_codes = np.arange(len(values.categories), dtype=values.codes.dtype)
2671-
categories = Categorical.from_codes(cat_codes, dtype=values.dtype)
2672+
cat = Categorical.from_codes(cat_codes, dtype=values.dtype)
2673+
categories = CategoricalIndex(cat)
26722674
codes = values.codes
26732675
else:
26742676
# The value of ordered is irrelevant since we don't use cat as such,
@@ -2680,26 +2682,26 @@ def factorize_from_iterable(values):
26802682
return codes, categories
26812683

26822684

2683-
def factorize_from_iterables(iterables):
2685+
def factorize_from_iterables(iterables) -> Tuple[List[np.ndarray], List[Index]]:
26842686
"""
26852687
A higher-level wrapper over `factorize_from_iterable`.
26862688
2687-
*This is an internal function*
2688-
26892689
Parameters
26902690
----------
26912691
iterables : list-like of list-likes
26922692
26932693
Returns
26942694
-------
2695-
codes_list : list of ndarrays
2696-
categories_list : list of Indexes
2695+
codes : list of ndarrays
2696+
categories : list of Indexes
26972697
26982698
Notes
26992699
-----
27002700
See `factorize_from_iterable` for more info.
27012701
"""
27022702
if len(iterables) == 0:
2703-
# For consistency, it should return a list of 2 lists.
2704-
return [[], []]
2705-
return map(list, zip(*(factorize_from_iterable(it) for it in iterables)))
2703+
# For consistency, it should return two empty lists.
2704+
return [], []
2705+
2706+
codes, categories = zip(*(factorize_from_iterable(it) for it in iterables))
2707+
return list(codes), list(categories)

pandas/core/reshape/reshape.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -991,7 +991,7 @@ def get_empty_frame(data) -> DataFrame:
991991
if prefix is None:
992992
dummy_cols = levels
993993
else:
994-
dummy_cols = [f"{prefix}{prefix_sep}{level}" for level in levels]
994+
dummy_cols = Index([f"{prefix}{prefix_sep}{level}" for level in levels])
995995

996996
index: Optional[Index]
997997
if isinstance(data, Series):

0 commit comments

Comments
 (0)