Skip to content

Commit 17ecb56

Browse files
authored
PERF/REGR: IntervalIndex.intersection, PeriodIndex.get_loc (#42293)
1 parent 8d5d67b commit 17ecb56

File tree

2 files changed

+55
-3
lines changed

2 files changed

+55
-3
lines changed

pandas/core/indexes/interval.py

+45
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,7 @@
6161
from pandas.core.dtypes.dtypes import IntervalDtype
6262
from pandas.core.dtypes.missing import is_valid_na_for_dtype
6363

64+
from pandas.core.algorithms import unique
6465
from pandas.core.arrays.interval import (
6566
IntervalArray,
6667
_interval_shared_docs,
@@ -787,6 +788,50 @@ def _format_data(self, name=None) -> str:
787788
# name argument is unused here; just for compat with base / categorical
788789
return self._data._format_data() + "," + self._format_space()
789790

791+
# --------------------------------------------------------------------
792+
# Set Operations
793+
794+
def _intersection(self, other, sort):
795+
"""
796+
intersection specialized to the case with matching dtypes.
797+
"""
798+
# For IntervalIndex we also know other.closed == self.closed
799+
if self.left.is_unique and self.right.is_unique:
800+
taken = self._intersection_unique(other)
801+
elif other.left.is_unique and other.right.is_unique and self.isna().sum() <= 1:
802+
# Swap other/self if other is unique and self does not have
803+
# multiple NaNs
804+
taken = other._intersection_unique(self)
805+
else:
806+
return super()._intersection(other, sort)
807+
808+
if sort is None:
809+
taken = taken.sort_values()
810+
811+
return taken
812+
813+
def _intersection_unique(self, other: IntervalIndex) -> IntervalIndex:
814+
"""
815+
Used when the IntervalIndex does not have any common endpoint,
816+
no matter left or right.
817+
Return the intersection with another IntervalIndex.
818+
Parameters
819+
----------
820+
other : IntervalIndex
821+
Returns
822+
-------
823+
IntervalIndex
824+
"""
825+
# Note: this is much more performant than super()._intersection(other)
826+
lindexer = self.left.get_indexer(other.left)
827+
rindexer = self.right.get_indexer(other.right)
828+
829+
match = (lindexer == rindexer) & (lindexer != -1)
830+
indexer = lindexer.take(match.nonzero()[0])
831+
indexer = unique(indexer)
832+
833+
return self.take(indexer)
834+
790835
# --------------------------------------------------------------------
791836

792837
@property

pandas/core/indexes/period.py

+10-3
Original file line numberDiff line numberDiff line change
@@ -438,10 +438,17 @@ def get_loc(self, key, method=None, tolerance=None):
438438
else:
439439
key = Period(parsed, freq=self.freq)
440440

441-
elif isinstance(key, Period) and key.freq != self.freq:
442-
raise KeyError(key)
443441
elif isinstance(key, Period):
444-
pass
442+
sfreq = self.freq
443+
kfreq = key.freq
444+
if not (
445+
sfreq.n == kfreq.n
446+
and sfreq._period_dtype_code == kfreq._period_dtype_code
447+
):
448+
# GH#42247 For the subset of DateOffsets that can be Period freqs,
449+
# checking these two attributes is sufficient to check equality,
450+
# and much more performant than `self.freq == key.freq`
451+
raise KeyError(key)
445452
elif isinstance(key, datetime):
446453
try:
447454
key = Period(key, freq=self.freq)

0 commit comments

Comments
 (0)