Skip to content

Commit 12566a7

Browse files
lukemanleypmhatre1
authored andcommitted
PERF: MultiIndex.equals for equal length indexes (pandas-dev#56990)
* avoid densifying level values in MultiIndex.equals * whatsnew
1 parent 34f26d4 commit 12566a7

File tree

3 files changed

+22
-30
lines changed

3 files changed

+22
-30
lines changed

asv_bench/benchmarks/multiindex_object.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -223,14 +223,20 @@ def time_categorical_level(self):
223223

224224
class Equals:
225225
def setup(self):
226-
idx_large_fast = RangeIndex(100000)
227-
idx_small_slow = date_range(start="1/1/2012", periods=1)
228-
self.mi_large_slow = MultiIndex.from_product([idx_large_fast, idx_small_slow])
229-
226+
self.mi = MultiIndex.from_product(
227+
[
228+
date_range("2000-01-01", periods=1000),
229+
RangeIndex(1000),
230+
]
231+
)
232+
self.mi_deepcopy = self.mi.copy(deep=True)
230233
self.idx_non_object = RangeIndex(1)
231234

235+
def time_equals_deepcopy(self):
236+
self.mi.equals(self.mi_deepcopy)
237+
232238
def time_equals_non_object_index(self):
233-
self.mi_large_slow.equals(self.idx_non_object)
239+
self.mi.equals(self.idx_non_object)
234240

235241

236242
class SetOperations:

doc/source/whatsnew/v3.0.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ Performance improvements
106106
- Performance improvement in :meth:`DataFrame.join` with ``how="left"`` or ``how="right"`` and ``sort=True`` (:issue:`56919`)
107107
- Performance improvement in :meth:`DataFrameGroupBy.ffill`, :meth:`DataFrameGroupBy.bfill`, :meth:`SeriesGroupBy.ffill`, and :meth:`SeriesGroupBy.bfill` (:issue:`56902`)
108108
- Performance improvement in :meth:`Index.take` when ``indices`` is a full range indexer from zero to length of index (:issue:`56806`)
109+
- Performance improvement in :meth:`MultiIndex.equals` for equal length indexes (:issue:`56990`)
109110
-
110111

111112
.. ---------------------------------------------------------------------------

pandas/core/indexes/multi.py

+10-25
Original file line numberDiff line numberDiff line change
@@ -3726,31 +3726,16 @@ def equals(self, other: object) -> bool:
37263726
other_mask = other_codes == -1
37273727
if not np.array_equal(self_mask, other_mask):
37283728
return False
3729-
self_codes = self_codes[~self_mask]
3730-
self_values = self.levels[i]._values.take(self_codes)
3731-
3732-
other_codes = other_codes[~other_mask]
3733-
other_values = other.levels[i]._values.take(other_codes)
3734-
3735-
# since we use NaT both datetime64 and timedelta64 we can have a
3736-
# situation where a level is typed say timedelta64 in self (IOW it
3737-
# has other values than NaT) but types datetime64 in other (where
3738-
# its all NaT) but these are equivalent
3739-
if len(self_values) == 0 and len(other_values) == 0:
3740-
continue
3741-
3742-
if not isinstance(self_values, np.ndarray):
3743-
# i.e. ExtensionArray
3744-
if not self_values.equals(other_values):
3745-
return False
3746-
elif not isinstance(other_values, np.ndarray):
3747-
# i.e. other is ExtensionArray
3748-
if not other_values.equals(self_values):
3749-
return False
3750-
else:
3751-
if not array_equivalent(self_values, other_values):
3752-
return False
3753-
3729+
self_level = self.levels[i]
3730+
other_level = other.levels[i]
3731+
new_codes = recode_for_categories(
3732+
other_codes, other_level, self_level, copy=False
3733+
)
3734+
if not np.array_equal(self_codes, new_codes):
3735+
return False
3736+
if not self_level[:0].equals(other_level[:0]):
3737+
# e.g. Int64 != int64
3738+
return False
37543739
return True
37553740

37563741
def equal_levels(self, other: MultiIndex) -> bool:

0 commit comments

Comments
 (0)