Skip to content

Commit 214bdfb

Browse files
authored
CLN: simplify MultiIndex.get_locs (pandas-dev#42245)
1 parent 96f01ac commit 214bdfb

File tree

2 files changed

+42
-34
lines changed

2 files changed

+42
-34
lines changed

pandas/core/indexes/multi.py

Lines changed: 41 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -3047,7 +3047,9 @@ def maybe_mi_droplevels(indexer, levels):
30473047
indexer = self._get_level_indexer(key, level=level)
30483048
return indexer, maybe_mi_droplevels(indexer, [level])
30493049

3050-
def _get_level_indexer(self, key, level: int = 0, indexer=None):
3050+
def _get_level_indexer(
3051+
self, key, level: int = 0, indexer: Int64Index | None = None
3052+
):
30513053
# `level` kwarg is _always_ positional, never name
30523054
# return an indexer, boolean array or a slice showing where the key is
30533055
# in the totality of values
@@ -3200,10 +3202,12 @@ def get_locs(self, seq):
32003202
"MultiIndex slicing requires the index to be lexsorted: slicing "
32013203
f"on levels {true_slices}, lexsort depth {self._lexsort_depth}"
32023204
)
3203-
# indexer
3204-
# this is the list of all values that we want to select
3205+
32053206
n = len(self)
3206-
indexer = None
3207+
# indexer is the list of all positions that we want to take; we
3208+
# start with it being everything and narrow it down as we look at each
3209+
# entry in `seq`
3210+
indexer = Index(np.arange(n))
32073211

32083212
def _convert_to_indexer(r) -> Int64Index:
32093213
# return an indexer
@@ -3221,78 +3225,84 @@ def _convert_to_indexer(r) -> Int64Index:
32213225
r = r.nonzero()[0]
32223226
return Int64Index(r)
32233227

3224-
def _update_indexer(idxr: Index | None, indexer: Index | None, key) -> Index:
3225-
if indexer is None:
3226-
indexer = Index(np.arange(n))
3227-
if idxr is None:
3228-
return indexer
3228+
def _update_indexer(idxr: Index, indexer: Index) -> Index:
32293229
indexer_intersection = indexer.intersection(idxr)
32303230
if indexer_intersection.empty and not idxr.empty and not indexer.empty:
3231-
raise KeyError(key)
3231+
raise KeyError(seq)
32323232
return indexer_intersection
32333233

32343234
for i, k in enumerate(seq):
32353235

32363236
if com.is_bool_indexer(k):
32373237
# a boolean indexer, must be the same length!
32383238
k = np.asarray(k)
3239-
indexer = _update_indexer(
3240-
_convert_to_indexer(k), indexer=indexer, key=seq
3241-
)
3239+
lvl_indexer = _convert_to_indexer(k)
3240+
indexer = _update_indexer(lvl_indexer, indexer=indexer)
32423241

32433242
elif is_list_like(k):
32443243
# a collection of labels to include from this level (these
32453244
# are or'd)
3245+
32463246
indexers: Int64Index | None = None
32473247
for x in k:
32483248
try:
3249-
idxrs = _convert_to_indexer(
3250-
self._get_level_indexer(x, level=i, indexer=indexer)
3251-
)
3252-
indexers = (idxrs if indexers is None else indexers).union(
3253-
idxrs, sort=False
3249+
# Argument "indexer" to "_get_level_indexer" of "MultiIndex"
3250+
# has incompatible type "Index"; expected "Optional[Int64Index]"
3251+
item_lvl_indexer = self._get_level_indexer(
3252+
x, level=i, indexer=indexer # type: ignore[arg-type]
32543253
)
32553254
except KeyError:
3256-
3257-
# ignore not founds
3255+
# ignore not founds; see discussion in GH#39424
32583256
continue
3257+
else:
3258+
idxrs = _convert_to_indexer(item_lvl_indexer)
3259+
3260+
if indexers is None:
3261+
indexers = idxrs
3262+
else:
3263+
indexers = indexers.union(idxrs, sort=False)
32593264

32603265
if indexers is not None:
3261-
indexer = _update_indexer(indexers, indexer=indexer, key=seq)
3266+
indexer = _update_indexer(indexers, indexer=indexer)
32623267
else:
32633268
# no matches we are done
3264-
return np.array([], dtype=np.int64)
3269+
# test_loc_getitem_duplicates_multiindex_empty_indexer
3270+
return np.array([], dtype=np.intp)
32653271

32663272
elif com.is_null_slice(k):
32673273
# empty slice
3268-
indexer = _update_indexer(None, indexer=indexer, key=seq)
3274+
pass
32693275

32703276
elif isinstance(k, slice):
32713277

32723278
# a slice, include BOTH of the labels
3279+
# Argument "indexer" to "_get_level_indexer" of "MultiIndex" has
3280+
# incompatible type "Index"; expected "Optional[Int64Index]"
3281+
lvl_indexer = self._get_level_indexer(
3282+
k,
3283+
level=i,
3284+
indexer=indexer, # type: ignore[arg-type]
3285+
)
32733286
indexer = _update_indexer(
3274-
_convert_to_indexer(
3275-
self._get_level_indexer(k, level=i, indexer=indexer)
3276-
),
3287+
_convert_to_indexer(lvl_indexer),
32773288
indexer=indexer,
3278-
key=seq,
32793289
)
32803290
else:
32813291
# a single label
3292+
lvl_indexer = self._get_loc_level(k, level=i)[0]
32823293
indexer = _update_indexer(
3283-
_convert_to_indexer(self._get_loc_level(k, level=i)[0]),
3294+
_convert_to_indexer(lvl_indexer),
32843295
indexer=indexer,
3285-
key=seq,
32863296
)
32873297

32883298
# empty indexer
32893299
if indexer is None:
3290-
return np.array([], dtype=np.int64)
3300+
return np.array([], dtype=np.intp)
32913301

32923302
assert isinstance(indexer, Int64Index), type(indexer)
32933303
indexer = self._reorder_indexer(seq, indexer)
32943304

3295-
return indexer._values
3305+
return indexer._values.astype(np.intp, copy=False)
32963306

32973307
# --------------------------------------------------------------------
32983308

pandas/core/indexing.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1244,9 +1244,7 @@ def _convert_to_indexer(self, key, axis: int, is_setter: bool = False):
12441244
return {"key": key}
12451245

12461246
if is_nested_tuple(key, labels):
1247-
if isinstance(self.obj, ABCSeries) and any(
1248-
isinstance(k, tuple) for k in key
1249-
):
1247+
if self.ndim == 1 and any(isinstance(k, tuple) for k in key):
12501248
# GH#35349 Raise if tuple in tuple for series
12511249
raise ValueError("Too many indices")
12521250
return labels.get_locs(key)

0 commit comments

Comments
 (0)