Skip to content

Commit dbfcd24

Browse files
author
Jean-Francois Zinque
committed
Improve performance of intersection of MultiIndex
1 parent c68e4d8 commit dbfcd24

File tree

2 files changed

+34
-2
lines changed

2 files changed

+34
-2
lines changed

asv_bench/benchmarks/multiindex_object.py

+26
Original file line numberDiff line numberDiff line change
@@ -160,4 +160,30 @@ def time_equals_non_object_index(self):
160160
self.mi_large_slow.equals(self.idx_non_object)
161161

162162

163+
class SetOperations:
164+
165+
params = [
166+
("monotonic", "non_monotonic"),
167+
("intersection", "union", "symmetric_difference"),
168+
]
169+
param_names = ["index_structure", "method"]
170+
171+
def setup(self, index_structure, method):
172+
N = 10 ** 5
173+
values = np.arange(N)
174+
175+
if index_structure == "monotonic":
176+
mi = MultiIndex.from_arrays([values, values])
177+
else:
178+
mi = MultiIndex.from_arrays(
179+
[np.concatenate([values[::2], values[1::2]]), values]
180+
)
181+
182+
self.left = mi
183+
self.right = mi[:-1]
184+
185+
def time_operation(self, index_structure, method):
186+
getattr(self.left, method)(self.right)
187+
188+
163189
from .pandas_vb_common import setup # noqa: F401 isort:skip

pandas/core/indexes/multi.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -3324,10 +3324,16 @@ def intersection(self, other, sort=False):
33243324
if self.equals(other):
33253325
return self
33263326

3327-
uniq_other = set(other.values)
3327+
lvals = self._ndarray_values
3328+
rvals = other._ndarray_values
3329+
3330+
if self.is_monotonic and other.is_monotonic:
3331+
return self._inner_indexer(lvals, rvals)[0]
3332+
3333+
other_uniq = set(rvals)
33283334
seen = set()
33293335
uniq_tuples = [
3330-
x for x in self.values if x in uniq_other and not (x in seen or seen.add(x))
3336+
x for x in lvals if x in other_uniq and not (x in seen or seen.add(x))
33313337
]
33323338

33333339
if sort is None:

0 commit comments

Comments
 (0)