Skip to content

Commit 5b9ca44

Browse files
authored
PERF/REGR: restore IntervalIndex._intersection_non_unique (pandas-dev#42334)
1 parent 29094b6 commit 5b9ca44

File tree

1 file changed

+31
-1
lines changed

1 file changed

+31
-1
lines changed

pandas/core/indexes/interval.py

+31-1
Original file line numberDiff line numberDiff line change
@@ -803,7 +803,8 @@ def _intersection(self, other, sort):
803803
# multiple NaNs
804804
taken = other._intersection_unique(self)
805805
else:
806-
return super()._intersection(other, sort)
806+
# duplicates
807+
taken = self._intersection_non_unique(other)
807808

808809
if sort is None:
809810
taken = taken.sort_values()
@@ -832,6 +833,35 @@ def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
832833

833834
return self.take(indexer)
834835

836+
def _intersection_non_unique(self, other: IntervalIndex) -> IntervalIndex:
837+
"""
838+
Used when the IntervalIndex does have some common endpoints,
839+
on either sides.
840+
Return the intersection with another IntervalIndex.
841+
842+
Parameters
843+
----------
844+
other : IntervalIndex
845+
846+
Returns
847+
-------
848+
IntervalIndex
849+
"""
850+
# Note: this is about 3.25x faster than super()._intersection(other)
851+
# in IntervalIndexMethod.time_intersection_both_duplicate(1000)
852+
mask = np.zeros(len(self), dtype=bool)
853+
854+
if self.hasnans and other.hasnans:
855+
first_nan_loc = np.arange(len(self))[self.isna()][0]
856+
mask[first_nan_loc] = True
857+
858+
other_tups = set(zip(other.left, other.right))
859+
for i, tup in enumerate(zip(self.left, self.right)):
860+
if tup in other_tups:
861+
mask[i] = True
862+
863+
return self[mask]
864+
835865
# --------------------------------------------------------------------
836866

837867
@property

0 commit comments

Comments
 (0)