File tree 3 files changed +9
-7
lines changed
3 files changed +9
-7
lines changed Original file line number Diff line number Diff line change 148
148
from pandas .core .ops .invalid import make_invalid_op
149
149
from pandas .core .sorting import (
150
150
ensure_key_mapped ,
151
+ get_group_index_sorter ,
151
152
nargsort ,
152
153
)
153
154
from pandas .core .strings import StringMethods
@@ -4098,9 +4099,7 @@ def _get_leaf_sorter(labels):
4098
4099
return np .empty (0 , dtype = "int64" )
4099
4100
4100
4101
if len (labels ) == 1 :
4101
- lab = ensure_int64 (labels [0 ])
4102
- sorter , _ = libalgos .groupsort_indexer (lab , 1 + lab .max ())
4103
- return sorter
4102
+ return get_group_index_sorter (labels [0 ])
4104
4103
4105
4104
# find indexers of beginning of each set of
4106
4105
# same-key labels w.r.t all but last level
Original file line number Diff line number Diff line change 9
9
10
10
import numpy as np
11
11
12
- import pandas ._libs .algos as libalgos
13
12
import pandas ._libs .reshape as libreshape
14
13
from pandas ._libs .sparse import IntIndex
15
14
from pandas ._typing import Dtype
42
41
decons_obs_group_ids ,
43
42
get_compressed_ids ,
44
43
get_group_index ,
44
+ get_group_index_sorter ,
45
45
)
46
46
47
47
@@ -139,8 +139,7 @@ def _indexer_and_to_sort(self):
139
139
comp_index , obs_ids = get_compressed_ids (to_sort , sizes )
140
140
ngroups = len (obs_ids )
141
141
142
- indexer = libalgos .groupsort_indexer (comp_index , ngroups )[0 ]
143
- indexer = ensure_platform_int (indexer )
142
+ indexer = get_group_index_sorter (comp_index , ngroups )
144
143
145
144
return indexer , to_sort
146
145
Original file line number Diff line number Diff line change @@ -567,7 +567,9 @@ def get_indexer_dict(
567
567
# sorting levels...cleverly?
568
568
569
569
570
- def get_group_index_sorter (group_index , ngroups : int ):
570
+ def get_group_index_sorter (
571
+ group_index : np .ndarray , ngroups : int | None = None
572
+ ) -> np .ndarray :
571
573
"""
572
574
algos.groupsort_indexer implements `counting sort` and it is at least
573
575
O(ngroups), where
@@ -581,6 +583,8 @@ def get_group_index_sorter(group_index, ngroups: int):
581
583
groupby operations. e.g. consider:
582
584
df.groupby(key)[col].transform('first')
583
585
"""
586
+ if ngroups is None :
587
+ ngroups = 1 + group_index .max ()
584
588
count = len (group_index )
585
589
alpha = 0.0 # taking complexities literally; there may be
586
590
beta = 1.0 # some room for fine-tuning these parameters
You can’t perform that action at this time.
0 commit comments