@@ -83,11 +83,14 @@ def get_indexer_indexer(
83
83
84
84
if level is not None :
85
85
_ , indexer = target .sortlevel (
86
- level , ascending = ascending , sort_remaining = sort_remaining
86
+ level ,
87
+ ascending = ascending ,
88
+ sort_remaining = sort_remaining ,
89
+ na_position = na_position ,
87
90
)
88
91
elif isinstance (target , ABCMultiIndex ):
89
92
indexer = lexsort_indexer (
90
- target ._get_codes_for_sorting () , orders = ascending , na_position = na_position
93
+ target .codes , orders = ascending , na_position = na_position , codes_given = True
91
94
)
92
95
else :
93
96
# Check monotonic-ness before sort an index (GH 11080)
@@ -302,7 +305,11 @@ def indexer_from_factorized(
302
305
303
306
304
307
def lexsort_indexer (
305
- keys , orders = None , na_position : str = "last" , key : Callable | None = None
308
+ keys ,
309
+ orders = None ,
310
+ na_position : str = "last" ,
311
+ key : Callable | None = None ,
312
+ codes_given : bool = False ,
306
313
) -> npt .NDArray [np .intp ]:
307
314
"""
308
315
Performs lexical sorting on a set of keys
@@ -321,6 +328,8 @@ def lexsort_indexer(
321
328
Determines placement of NA elements in the sorted list ("last" or "first")
322
329
key : Callable, optional
323
330
Callable key function applied to every element in keys before sorting
331
+ codes_given: bool, False
332
+ Avoid categorical materialization if codes are already provided.
324
333
325
334
Returns
326
335
-------
@@ -338,15 +347,27 @@ def lexsort_indexer(
338
347
keys = [ensure_key_mapped (k , key ) for k in keys ]
339
348
340
349
for k , order in zip (keys , orders ):
341
- cat = Categorical (k , ordered = True )
342
-
343
350
if na_position not in ["last" , "first" ]:
344
351
raise ValueError (f"invalid na_position: { na_position } " )
345
352
346
- n = len (cat .categories )
347
- codes = cat .codes .copy ()
353
+ if codes_given :
354
+ mask = k == - 1
355
+ codes = k .copy ()
356
+ n = len (codes )
357
+ mask_n = n
358
+ if mask .any ():
359
+ n -= 1
360
+
361
+ else :
362
+ cat = Categorical (k , ordered = True )
363
+ n = len (cat .categories )
364
+ codes = cat .codes .copy ()
365
+ mask = cat .codes == - 1
366
+ if mask .any ():
367
+ mask_n = n + 1
368
+ else :
369
+ mask_n = n
348
370
349
- mask = cat .codes == - 1
350
371
if order : # ascending
351
372
if na_position == "last" :
352
373
codes = np .where (mask , n , codes )
@@ -357,10 +378,8 @@ def lexsort_indexer(
357
378
codes = np .where (mask , n , n - codes - 1 )
358
379
elif na_position == "first" :
359
380
codes = np .where (mask , 0 , n - codes )
360
- if mask .any ():
361
- n += 1
362
381
363
- shape .append (n )
382
+ shape .append (mask_n )
364
383
labels .append (codes )
365
384
366
385
return indexer_from_factorized (labels , tuple (shape ))
0 commit comments