Skip to content

Commit b33389f

Browse files
REF: don't call libalgos.groupsort_indexer directly for simple argsort usage (#40105)
1 parent 0b671be commit b33389f

File tree

3 files changed

+9
-7
lines changed

3 files changed

+9
-7
lines changed

pandas/core/indexes/base.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,7 @@
148148
from pandas.core.ops.invalid import make_invalid_op
149149
from pandas.core.sorting import (
150150
ensure_key_mapped,
151+
get_group_index_sorter,
151152
nargsort,
152153
)
153154
from pandas.core.strings import StringMethods
@@ -4098,9 +4099,7 @@ def _get_leaf_sorter(labels):
40984099
return np.empty(0, dtype="int64")
40994100

41004101
if len(labels) == 1:
4101-
lab = ensure_int64(labels[0])
4102-
sorter, _ = libalgos.groupsort_indexer(lab, 1 + lab.max())
4103-
return sorter
4102+
return get_group_index_sorter(labels[0])
41044103

41054104
# find indexers of beginning of each set of
41064105
# same-key labels w.r.t all but last level

pandas/core/reshape/reshape.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99

1010
import numpy as np
1111

12-
import pandas._libs.algos as libalgos
1312
import pandas._libs.reshape as libreshape
1413
from pandas._libs.sparse import IntIndex
1514
from pandas._typing import Dtype
@@ -42,6 +41,7 @@
4241
decons_obs_group_ids,
4342
get_compressed_ids,
4443
get_group_index,
44+
get_group_index_sorter,
4545
)
4646

4747

@@ -139,8 +139,7 @@ def _indexer_and_to_sort(self):
139139
comp_index, obs_ids = get_compressed_ids(to_sort, sizes)
140140
ngroups = len(obs_ids)
141141

142-
indexer = libalgos.groupsort_indexer(comp_index, ngroups)[0]
143-
indexer = ensure_platform_int(indexer)
142+
indexer = get_group_index_sorter(comp_index, ngroups)
144143

145144
return indexer, to_sort
146145

pandas/core/sorting.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,9 @@ def get_indexer_dict(
567567
# sorting levels...cleverly?
568568

569569

570-
def get_group_index_sorter(group_index, ngroups: int):
570+
def get_group_index_sorter(
571+
group_index: np.ndarray, ngroups: int | None = None
572+
) -> np.ndarray:
571573
"""
572574
algos.groupsort_indexer implements `counting sort` and it is at least
573575
O(ngroups), where
@@ -581,6 +583,8 @@ def get_group_index_sorter(group_index, ngroups: int):
581583
groupby operations. e.g. consider:
582584
df.groupby(key)[col].transform('first')
583585
"""
586+
if ngroups is None:
587+
ngroups = 1 + group_index.max()
584588
count = len(group_index)
585589
alpha = 0.0 # taking complexities literally; there may be
586590
beta = 1.0 # some room for fine-tuning these parameters

0 commit comments

Comments
 (0)