diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e633d6b28a8c5..9b0c85abc7ebd 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -148,6 +148,7 @@ from pandas.core.ops.invalid import make_invalid_op from pandas.core.sorting import ( ensure_key_mapped, + get_group_index_sorter, nargsort, ) from pandas.core.strings import StringMethods @@ -4098,9 +4099,7 @@ def _get_leaf_sorter(labels): return np.empty(0, dtype="int64") if len(labels) == 1: - lab = ensure_int64(labels[0]) - sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max()) - return sorter + return get_group_index_sorter(labels[0]) # find indexers of beginning of each set of # same-key labels w.r.t all but last level diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 543bf44e61216..271bb2ca8dd75 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -9,7 +9,6 @@ import numpy as np -import pandas._libs.algos as libalgos import pandas._libs.reshape as libreshape from pandas._libs.sparse import IntIndex from pandas._typing import Dtype @@ -42,6 +41,7 @@ decons_obs_group_ids, get_compressed_ids, get_group_index, + get_group_index_sorter, ) @@ -139,8 +139,7 @@ def _indexer_and_to_sort(self): comp_index, obs_ids = get_compressed_ids(to_sort, sizes) ngroups = len(obs_ids) - indexer = libalgos.groupsort_indexer(comp_index, ngroups)[0] - indexer = ensure_platform_int(indexer) + indexer = get_group_index_sorter(comp_index, ngroups) return indexer, to_sort diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py index 0195969de1f17..973fed2c1436f 100644 --- a/pandas/core/sorting.py +++ b/pandas/core/sorting.py @@ -567,7 +567,9 @@ def get_indexer_dict( # sorting levels...cleverly? -def get_group_index_sorter(group_index, ngroups: int): +def get_group_index_sorter( + group_index: np.ndarray, ngroups: int | None = None +) -> np.ndarray: """ algos.groupsort_indexer implements `counting sort` and it is at least O(ngroups), where @@ -581,6 +583,8 @@ def get_group_index_sorter(group_index, ngroups: int): groupby operations. e.g. consider: df.groupby(key)[col].transform('first') """ + if ngroups is None: + ngroups = 1 + group_index.max() count = len(group_index) alpha = 0.0 # taking complexities literally; there may be beta = 1.0 # some room for fine-tuning these parameters