Skip to content

Commit 9e51ab0

Browse files
committed
REF: Unify use of factorize() in groupby
Grouping._codes_and_uniques and BaseGrouper._get_compressed_codes were both semantically doing the same thing as core.algorithms.factorize(), so rename them so that it's just a bit easier to follow. Per a review comment in pandas-dev#46207
1 parent 3e718e3 commit 9e51ab0

File tree

2 files changed

+8
-7
lines changed

2 files changed

+8
-7
lines changed

pandas/core/groupby/grouper.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -619,7 +619,7 @@ def codes(self) -> npt.NDArray[np.signedinteger]:
619619
# _codes is set in __init__ for MultiIndex cases
620620
return self._codes
621621

622-
return self._codes_and_uniques[0]
622+
return self._factorize[0]
623623

624624
@cache_readonly
625625
def group_arraylike(self) -> ArrayLike:
@@ -635,7 +635,7 @@ def group_arraylike(self) -> ArrayLike:
635635
# retain dtype for categories, including unobserved ones
636636
return self.result_index._values
637637

638-
return self._codes_and_uniques[1]
638+
return self._factorize[1]
639639

640640
@cache_readonly
641641
def result_index(self) -> Index:
@@ -653,11 +653,12 @@ def group_index(self) -> Index:
653653
# _group_index is set in __init__ for MultiIndex cases
654654
return self._group_index
655655

656-
uniques = self._codes_and_uniques[1]
656+
uniques = self._factorize[1]
657657
return Index._with_infer(uniques, name=self.name)
658658

659659
@cache_readonly
660-
def _codes_and_uniques(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
660+
def _factorize(self) -> tuple[npt.NDArray[np.signedinteger], ArrayLike]:
661+
"""Analogous to core.algorithms.factorize"""
661662
if self._passed_categorical:
662663
# we make a CategoricalIndex out of the cat grouper
663664
# preserving the categories / ordered attributes

pandas/core/groupby/ops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -878,7 +878,7 @@ def has_dropped_na(self) -> bool:
878878

879879
@cache_readonly
880880
def group_info(self) -> tuple[npt.NDArray[np.intp], npt.NDArray[np.intp], int]:
881-
comp_ids, obs_group_ids = self._get_compressed_codes()
881+
comp_ids, obs_group_ids = self._factorize()
882882

883883
ngroups = len(obs_group_ids)
884884
comp_ids = ensure_platform_int(comp_ids)
@@ -899,10 +899,10 @@ def codes_info(self) -> npt.NDArray[np.intp]:
899899
return ids
900900

901901
@final
902-
def _get_compressed_codes(
902+
def _factorize(
903903
self,
904904
) -> tuple[npt.NDArray[np.signedinteger], npt.NDArray[np.intp]]:
905-
# The first returned ndarray may have any signed integer dtype
905+
"""Analogous to core.algorithms.factorize"""
906906
if len(self.groupings) > 1:
907907
group_index = get_group_index(self.codes, self.shape, sort=True, xnull=True)
908908
return compress_group_index(group_index, sort=self._sort)

0 commit comments

Comments
 (0)