Skip to content

Commit 9d6d587

Browse files
authored
PERF: MultiIndex.get_locs (#45931)
1 parent 1f54efe commit 9d6d587

File tree

3 files changed

+39
-12
lines changed

3 files changed

+39
-12
lines changed

asv_bench/benchmarks/multiindex_object.py

+23
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,29 @@ def time_small_get_loc_warm(self):
4747
self.mi_small.get_loc((99, "A", "A"))
4848

4949

50+
class GetLocs:
51+
def setup(self):
52+
self.mi_large = MultiIndex.from_product(
53+
[np.arange(1000), np.arange(20), list(string.ascii_letters)],
54+
names=["one", "two", "three"],
55+
)
56+
self.mi_med = MultiIndex.from_product(
57+
[np.arange(1000), np.arange(10), list("A")], names=["one", "two", "three"]
58+
)
59+
self.mi_small = MultiIndex.from_product(
60+
[np.arange(100), list("A"), list("A")], names=["one", "two", "three"]
61+
)
62+
63+
def time_large_get_locs(self):
64+
self.mi_large.get_locs([999, 19, "Z"])
65+
66+
def time_med_get_locs(self):
67+
self.mi_med.get_locs([999, 9, "A"])
68+
69+
def time_small_get_locs(self):
70+
self.mi_small.get_locs([99, "A", "A"])
71+
72+
5073
class Duplicates:
5174
def setup(self):
5275
size = 65536

doc/source/whatsnew/v1.5.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -253,6 +253,7 @@ Performance improvements
253253
- Performance improvement in :meth:`DataFrame.duplicated` when subset consists of only one column (:issue:`45236`)
254254
- Performance improvement in :meth:`.GroupBy.transform` when broadcasting values for user-defined functions (:issue:`45708`)
255255
- Performance improvement in :meth:`.GroupBy.transform` for user-defined functions when only a single group exists (:issue:`44977`)
256+
- Performance improvement in :meth:`MultiIndex.get_locs` (:issue:`45681`)
256257
- Performance improvement in :func:`merge` when left and/or right are empty (:issue:`45838`)
257258
- Performance improvement in :class:`DataFrame` and :class:`Series` constructors for extension dtype scalars (:issue:`45854`)
258259
-

pandas/core/indexes/multi.py

+15-12
Original file line numberDiff line numberDiff line change
@@ -3224,8 +3224,8 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
32243224
return convert_indexer(start, stop + 1, step)
32253225
else:
32263226
# sorted, so can return slice object -> view
3227-
i = level_codes.searchsorted(start, side="left")
3228-
j = level_codes.searchsorted(stop, side="right")
3227+
i = algos.searchsorted(level_codes, start, side="left")
3228+
j = algos.searchsorted(level_codes, stop, side="right")
32293229
return slice(i, j, step)
32303230

32313231
else:
@@ -3248,12 +3248,12 @@ def convert_indexer(start, stop, step, indexer=indexer, codes=level_codes):
32483248

32493249
if isinstance(idx, slice):
32503250
# e.g. test_partial_string_timestamp_multiindex
3251-
start = level_codes.searchsorted(idx.start, side="left")
3251+
start = algos.searchsorted(level_codes, idx.start, side="left")
32523252
# NB: "left" here bc of slice semantics
3253-
end = level_codes.searchsorted(idx.stop, side="left")
3253+
end = algos.searchsorted(level_codes, idx.stop, side="left")
32543254
else:
3255-
start = level_codes.searchsorted(idx, side="left")
3256-
end = level_codes.searchsorted(idx, side="right")
3255+
start = algos.searchsorted(level_codes, idx, side="left")
3256+
end = algos.searchsorted(level_codes, idx, side="right")
32573257

32583258
if start == end:
32593259
# The label is present in self.levels[level] but unused:
@@ -3304,10 +3304,10 @@ def get_locs(self, seq):
33043304
)
33053305

33063306
n = len(self)
3307-
# indexer is the list of all positions that we want to take; we
3308-
# start with it being everything and narrow it down as we look at each
3309-
# entry in `seq`
3310-
indexer = Index(np.arange(n))
3307+
# indexer is the list of all positions that we want to take; it
3308+
# is created on the first entry in seq and narrowed down as we
3309+
# look at remaining entries
3310+
indexer = None
33113311

33123312
if any(x is Ellipsis for x in seq):
33133313
raise NotImplementedError(
@@ -3330,7 +3330,9 @@ def _convert_to_indexer(r) -> Int64Index:
33303330
r = r.nonzero()[0]
33313331
return Int64Index(r)
33323332

3333-
def _update_indexer(idxr: Index, indexer: Index) -> Index:
3333+
def _update_indexer(idxr: Index, indexer: Index | None) -> Index:
3334+
if indexer is None:
3335+
return idxr
33343336
indexer_intersection = indexer.intersection(idxr)
33353337
if indexer_intersection.empty and not idxr.empty and not indexer.empty:
33363338
raise KeyError(seq)
@@ -3415,7 +3417,8 @@ def _update_indexer(idxr: Index, indexer: Index) -> Index:
34153417

34163418
elif com.is_null_slice(k):
34173419
# empty slice
3418-
pass
3420+
if indexer is None:
3421+
indexer = Index(np.arange(n))
34193422

34203423
elif isinstance(k, slice):
34213424

0 commit comments

Comments
 (0)