84
84
get_unanimous_names ,
85
85
)
86
86
from pandas .core .indexes .frozen import FrozenList
87
- from pandas .core .indexes .numeric import Int64Index
88
87
from pandas .core .ops .invalid import make_invalid_op
89
88
from pandas .core .sorting import (
90
89
get_group_index ,
@@ -3160,47 +3159,37 @@ def maybe_mi_droplevels(indexer, levels):
3160
3159
return indexer , result_index
3161
3160
3162
3161
def _get_level_indexer (
3163
- self , key , level : int = 0 , indexer : Int64Index | None = None
3162
+ self , key , level : int = 0 , indexer : npt . NDArray [ np . bool_ ] | None = None
3164
3163
):
3165
3164
# `level` kwarg is _always_ positional, never name
3166
- # return an indexer, boolean array or a slice showing where the key is
3165
+ # return a boolean array or slice showing where the key is
3167
3166
# in the totality of values
3168
3167
# if the indexer is provided, then use this
3169
3168
3170
3169
level_index = self .levels [level ]
3171
3170
level_codes = self .codes [level ]
3172
3171
3173
3172
def convert_indexer (start , stop , step , indexer = indexer , codes = level_codes ):
3174
- # given the inputs and the codes/indexer, compute an indexer set
3175
- # if we have a provided indexer, then this need not consider
3176
- # the entire labels set
3177
- r = np .arange (start , stop , step )
3178
-
3179
- if indexer is not None and len (indexer ) != len (codes ):
3180
-
3181
- # we have an indexer which maps the locations in the labels
3182
- # that we have already selected (and is not an indexer for the
3183
- # entire set) otherwise this is wasteful so we only need to
3184
- # examine locations that are in this set the only magic here is
3185
- # that the result are the mappings to the set that we have
3186
- # selected
3187
- from pandas import Series
3188
-
3189
- mapper = Series (indexer )
3190
- indexer = codes .take (ensure_platform_int (indexer ))
3191
- result = Series (Index (indexer ).isin (r ).nonzero ()[0 ])
3192
- m = result .map (mapper )
3193
- # error: Incompatible types in assignment (expression has type
3194
- # "ndarray", variable has type "Series")
3195
- m = np .asarray (m ) # type: ignore[assignment]
3196
-
3173
+ # Compute a bool indexer to identify the positions to take.
3174
+ # If we have an existing indexer, we only need to examine the
3175
+ # subset of positions where the existing indexer is True.
3176
+ if indexer is not None :
3177
+ # we only need to look at the subset of codes where the
3178
+ # existing indexer equals True
3179
+ codes = codes [indexer ]
3180
+
3181
+ if step is None or step == 1 :
3182
+ new_indexer = (codes >= start ) & (codes < stop )
3197
3183
else :
3198
- # error: Incompatible types in assignment (expression has type
3199
- # "ndarray", variable has type "Series")
3200
- m = np .zeros (len (codes ), dtype = bool ) # type: ignore[assignment]
3201
- m [np .in1d (codes , r , assume_unique = Index (codes ).is_unique )] = True
3184
+ r = np .arange (start , stop , step , dtype = codes .dtype )
3185
+ new_indexer = algos .isin (codes , r )
3186
+
3187
+ if indexer is None :
3188
+ return new_indexer
3202
3189
3203
- return m
3190
+ indexer = indexer .copy ()
3191
+ indexer [indexer ] = new_indexer
3192
+ return indexer
3204
3193
3205
3194
if isinstance (key , slice ):
3206
3195
# handle a slice, returning a slice if we can
@@ -3327,62 +3316,41 @@ def get_locs(self, seq):
3327
3316
f"on levels { true_slices } , lexsort depth { self ._lexsort_depth } "
3328
3317
)
3329
3318
3330
- n = len (self )
3331
- # indexer is the list of all positions that we want to take; it
3332
- # is created on the first entry in seq and narrowed down as we
3333
- # look at remaining entries
3334
- indexer = None
3335
-
3336
3319
if any (x is Ellipsis for x in seq ):
3337
3320
raise NotImplementedError (
3338
3321
"MultiIndex does not support indexing with Ellipsis"
3339
3322
)
3340
3323
3341
- def _convert_to_indexer (r ) -> Int64Index :
3342
- # return an indexer
3343
- if isinstance (r , slice ):
3344
- m = np .zeros (n , dtype = bool )
3345
- m [r ] = True
3346
- r = m .nonzero ()[0 ]
3347
- elif com .is_bool_indexer (r ):
3348
- if len (r ) != n :
3349
- raise ValueError (
3350
- "cannot index with a boolean indexer "
3351
- "that is not the same length as the "
3352
- "index"
3353
- )
3354
- r = r .nonzero ()[0 ]
3355
- return Int64Index (r )
3324
+ n = len (self )
3356
3325
3357
- def _update_indexer (idxr : Index , indexer : Index | None ) -> Index :
3358
- if indexer is None :
3359
- return idxr
3360
- indexer_intersection = indexer .intersection (idxr )
3361
- if indexer_intersection .empty and not idxr .empty and not indexer .empty :
3362
- raise KeyError (seq )
3363
- return indexer_intersection
3326
+ def _to_bool_indexer (indexer ) -> npt .NDArray [np .bool_ ]:
3327
+ if isinstance (indexer , slice ):
3328
+ new_indexer = np .zeros (n , dtype = np .bool_ )
3329
+ new_indexer [indexer ] = True
3330
+ return new_indexer
3331
+ return indexer
3332
+
3333
+ # a bool indexer for the positions we want to take
3334
+ indexer : npt .NDArray [np .bool_ ] | None = None
3364
3335
3365
3336
for i , k in enumerate (seq ):
3366
3337
3338
+ lvl_indexer : npt .NDArray [np .bool_ ] | slice | None = None
3339
+
3367
3340
if com .is_bool_indexer (k ):
3368
- # a boolean indexer, must be the same length!
3369
- k = np .asarray (k )
3370
- lvl_indexer = _convert_to_indexer (k )
3371
- indexer = _update_indexer (lvl_indexer , indexer = indexer )
3341
+ if len (k ) != n :
3342
+ raise ValueError (
3343
+ "cannot index with a boolean indexer that "
3344
+ "is not the same length as the index"
3345
+ )
3346
+ lvl_indexer = np .asarray (k )
3372
3347
3373
3348
elif is_list_like (k ):
3374
- # a collection of labels to include from this level (these
3375
- # are or'd)
3376
-
3377
- indexers : Int64Index | None = None
3349
+ # a collection of labels to include from this level (these are or'd)
3378
3350
3379
3351
# GH#27591 check if this is a single tuple key in the level
3380
3352
try :
3381
- # Argument "indexer" to "_get_level_indexer" of "MultiIndex"
3382
- # has incompatible type "Index"; expected "Optional[Int64Index]"
3383
- lev_loc = self ._get_level_indexer (
3384
- k , level = i , indexer = indexer # type: ignore[arg-type]
3385
- )
3353
+ lvl_indexer = self ._get_level_indexer (k , level = i , indexer = indexer )
3386
3354
except (InvalidIndexError , TypeError , KeyError ) as err :
3387
3355
# InvalidIndexError e.g. non-hashable, fall back to treating
3388
3356
# this as a sequence of labels
@@ -3394,11 +3362,8 @@ def _update_indexer(idxr: Index, indexer: Index | None) -> Index:
3394
3362
# e.g. slice
3395
3363
raise err
3396
3364
try :
3397
- # Argument "indexer" to "_get_level_indexer" of "MultiIndex"
3398
- # has incompatible type "Index"; expected
3399
- # "Optional[Int64Index]"
3400
- item_lvl_indexer = self ._get_level_indexer (
3401
- x , level = i , indexer = indexer # type: ignore[arg-type]
3365
+ item_indexer = self ._get_level_indexer (
3366
+ x , level = i , indexer = indexer
3402
3367
)
3403
3368
except KeyError :
3404
3369
# ignore not founds; see discussion in GH#39424
@@ -3418,82 +3383,63 @@ def _update_indexer(idxr: Index, indexer: Index | None) -> Index:
3418
3383
)
3419
3384
continue
3420
3385
else :
3421
- idxrs = _convert_to_indexer ( item_lvl_indexer )
3422
-
3423
- if indexers is None :
3424
- indexers = idxrs
3386
+ if lvl_indexer is None :
3387
+ lvl_indexer = _to_bool_indexer ( item_indexer )
3388
+ elif isinstance ( item_indexer , slice ) :
3389
+ lvl_indexer [ item_indexer ] = True # type: ignore[index]
3425
3390
else :
3426
- indexers = indexers .union (idxrs , sort = False )
3427
-
3428
- else :
3429
- idxrs = _convert_to_indexer (lev_loc )
3430
- if indexers is None :
3431
- indexers = idxrs
3432
- else :
3433
- indexers = indexers .union (idxrs , sort = False )
3391
+ lvl_indexer |= item_indexer
3434
3392
3435
- if indexers is not None :
3436
- indexer = _update_indexer (indexers , indexer = indexer )
3437
- else :
3393
+ if lvl_indexer is None :
3438
3394
# no matches we are done
3439
3395
# test_loc_getitem_duplicates_multiindex_empty_indexer
3440
3396
return np .array ([], dtype = np .intp )
3441
3397
3442
3398
elif com .is_null_slice (k ):
3443
3399
# empty slice
3444
- if indexer is None :
3445
- indexer = Index (np .arange (n ))
3400
+ if indexer is None and i == len (seq ) - 1 :
3401
+ return np .arange (n , dtype = np .intp )
3402
+ continue
3446
3403
3447
- elif isinstance (k , slice ):
3404
+ else :
3405
+ # a slice or a single label
3406
+ lvl_indexer = self ._get_level_indexer (k , level = i , indexer = indexer )
3448
3407
3449
- # a slice, include BOTH of the labels
3450
- # Argument "indexer" to "_get_level_indexer" of "MultiIndex" has
3451
- # incompatible type "Index"; expected "Optional[Int64Index]"
3452
- lvl_indexer = self ._get_level_indexer (
3453
- k ,
3454
- level = i ,
3455
- indexer = indexer , # type: ignore[arg-type]
3456
- )
3457
- indexer = _update_indexer (
3458
- _convert_to_indexer (lvl_indexer ),
3459
- indexer = indexer ,
3460
- )
3408
+ # update indexer
3409
+ lvl_indexer = _to_bool_indexer (lvl_indexer )
3410
+ if indexer is None :
3411
+ indexer = lvl_indexer
3461
3412
else :
3462
- # a single label
3463
- lvl_indexer = self ._get_loc_level (k , level = i )[0 ]
3464
- indexer = _update_indexer (
3465
- _convert_to_indexer (lvl_indexer ),
3466
- indexer = indexer ,
3467
- )
3413
+ indexer &= lvl_indexer
3414
+ if not np .any (indexer ) and np .any (lvl_indexer ):
3415
+ raise KeyError (seq )
3468
3416
3469
3417
# empty indexer
3470
3418
if indexer is None :
3471
3419
return np .array ([], dtype = np .intp )
3472
3420
3473
- assert isinstance (indexer , Int64Index ), type (indexer )
3474
- indexer = self ._reorder_indexer (seq , indexer )
3475
-
3476
- return indexer ._values .astype (np .intp , copy = False )
3421
+ pos_indexer = indexer .nonzero ()[0 ]
3422
+ return self ._reorder_indexer (seq , pos_indexer )
3477
3423
3478
3424
# --------------------------------------------------------------------
3479
3425
3480
3426
def _reorder_indexer (
3481
3427
self ,
3482
3428
seq : tuple [Scalar | Iterable | AnyArrayLike , ...],
3483
- indexer : Int64Index ,
3484
- ) -> Int64Index :
3429
+ indexer : npt . NDArray [ np . intp ] ,
3430
+ ) -> npt . NDArray [ np . intp ] :
3485
3431
"""
3486
- Reorder an indexer of a MultiIndex (self) so that the label are in the
3432
+ Reorder an indexer of a MultiIndex (self) so that the labels are in the
3487
3433
same order as given in seq
3488
3434
3489
3435
Parameters
3490
3436
----------
3491
3437
seq : label/slice/list/mask or a sequence of such
3492
- indexer: an Int64Index indexer of self
3438
+ indexer: a position indexer of self
3493
3439
3494
3440
Returns
3495
3441
-------
3496
- indexer : a sorted Int64Index indexer of self ordered as seq
3442
+ indexer : a sorted position indexer of self ordered as seq
3497
3443
"""
3498
3444
# If the index is lexsorted and the list_like label in seq are sorted
3499
3445
# then we do not need to sort
0 commit comments