Skip to content

Commit c0f6428

Browse files
tlaytongooglejreback
authored andcommitted
Improve performance of equality comparison between a simple Index and a MultiIndex (pandas-dev#29134)
1 parent c138864 commit c0f6428

File tree

5 files changed

+38
-1
lines changed

5 files changed

+38
-1
lines changed

asv_bench/benchmarks/index_object.py

+13
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
Float64Index,
88
Index,
99
IntervalIndex,
10+
MultiIndex,
1011
RangeIndex,
1112
Series,
1213
date_range,
@@ -111,6 +112,18 @@ def time_get_loc_dec(self):
111112
self.idx_dec.get_loc(100000)
112113

113114

115+
class IndexEquals:
116+
def setup(self):
117+
idx_large_fast = RangeIndex(100000)
118+
idx_small_slow = date_range(start="1/1/2012", periods=1)
119+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
120+
121+
self.idx_non_object = RangeIndex(1)
122+
123+
def time_non_object_equals_multiindex(self):
124+
self.idx_non_object.equals(self.mi_large_slow)
125+
126+
114127
class IndexAppend:
115128
def setup(self):
116129

asv_bench/benchmarks/multiindex_object.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
import numpy as np
44

5-
from pandas import DataFrame, MultiIndex, date_range
5+
from pandas import DataFrame, MultiIndex, RangeIndex, date_range
66
import pandas.util.testing as tm
77

88

@@ -147,4 +147,16 @@ def time_categorical_level(self):
147147
self.df.set_index(["a", "b"])
148148

149149

150+
class Equals:
151+
def setup(self):
152+
idx_large_fast = RangeIndex(100000)
153+
idx_small_slow = date_range(start="1/1/2012", periods=1)
154+
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
155+
156+
self.idx_non_object = RangeIndex(1)
157+
158+
def time_equals_non_object_index(self):
159+
self.mi_large_slow.equals(self.idx_non_object)
160+
161+
150162
from .pandas_vb_common import setup # noqa: F401 isort:skip

doc/source/whatsnew/v1.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -628,6 +628,7 @@ Performance improvements
628628
- Performance improvement when comparing a :class:`Categorical` with a scalar and the scalar is not found in the categories (:issue:`29750`)
629629
- Performance improvement when checking if values in a :class:`Categorical` are equal, equal or larger or larger than a given scalar.
630630
The improvement is not present if checking if the :class:`Categorical` is less than or less than or equal than the scalar (:issue:`29820`)
631+
- Performance improvement in :meth:`Index.equals` and :meth:`MultiIndex.equals` (:issue:`29134`)
631632

632633
.. _whatsnew_1000.bug_fixes:
633634

pandas/core/indexes/base.py

+6
Original file line numberDiff line numberDiff line change
@@ -4172,6 +4172,12 @@ def equals(self, other) -> bool:
41724172
# if other is not object, use other's logic for coercion
41734173
return other.equals(self)
41744174

4175+
if isinstance(other, ABCMultiIndex):
4176+
# d-level MultiIndex can equal d-tuple Index
4177+
if not is_object_dtype(self.dtype):
4178+
if self.nlevels != other.nlevels:
4179+
return False
4180+
41754181
return array_equivalent(
41764182
com.values_from_object(self), com.values_from_object(other)
41774183
)

pandas/core/indexes/multi.py

+5
Original file line numberDiff line numberDiff line change
@@ -3053,6 +3053,11 @@ def equals(self, other) -> bool:
30533053
return False
30543054

30553055
if not isinstance(other, MultiIndex):
3056+
# d-level MultiIndex can equal d-tuple Index
3057+
if not is_object_dtype(other.dtype):
3058+
if self.nlevels != other.nlevels:
3059+
return False
3060+
30563061
other_vals = com.values_from_object(ensure_index(other))
30573062
return array_equivalent(self._ndarray_values, other_vals)
30583063

0 commit comments

Comments
 (0)