Skip to content

Commit 9163297

Browse files
jbrockmendelluckyvs1
authored andcommitted
PERF: IntervalIndex.isin (pandas-dev#38353)
1 parent a1e2e2d commit 9163297

File tree

3 files changed

+48
-4
lines changed

3 files changed

+48
-4
lines changed

doc/source/whatsnew/v1.3.0.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ Deprecations
133133

134134
Performance improvements
135135
~~~~~~~~~~~~~~~~~~~~~~~~
136-
136+
- Performance improvement in :meth:`IntervalIndex.isin` (:issue:`38353`)
137137
-
138138
-
139139

pandas/core/algorithms.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
is_float_dtype,
3737
is_integer,
3838
is_integer_dtype,
39+
is_interval_dtype,
3940
is_list_like,
4041
is_numeric_dtype,
4142
is_object_dtype,
@@ -63,7 +64,7 @@
6364

6465
if TYPE_CHECKING:
6566
from pandas import Categorical, DataFrame, Index, Series
66-
from pandas.core.arrays import DatetimeArray, TimedeltaArray
67+
from pandas.core.arrays import DatetimeArray, IntervalArray, TimedeltaArray
6768

6869
_shared_docs: Dict[str, str] = {}
6970

@@ -453,7 +454,10 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
453454
# handle categoricals
454455
return cast("Categorical", comps).isin(values)
455456

456-
if needs_i8_conversion(comps.dtype):
457+
elif is_interval_dtype(comps.dtype):
458+
return cast("IntervalArray", comps).isin(values)
459+
460+
elif needs_i8_conversion(comps.dtype):
457461
# Dispatch to DatetimeLikeArrayMixin.isin
458462
return array(comps).isin(values)
459463
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):

pandas/core/arrays/interval.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,13 +14,15 @@
1414
intervals_to_interval_bounds,
1515
)
1616
from pandas._libs.missing import NA
17+
from pandas._typing import ArrayLike
1718
from pandas.compat.numpy import function as nv
1819
from pandas.util._decorators import Appender
1920

2021
from pandas.core.dtypes.cast import maybe_convert_platform
2122
from pandas.core.dtypes.common import (
2223
is_categorical_dtype,
2324
is_datetime64_any_dtype,
25+
is_dtype_equal,
2426
is_float_dtype,
2527
is_integer_dtype,
2628
is_interval_dtype,
@@ -29,6 +31,7 @@
2931
is_scalar,
3032
is_string_dtype,
3133
is_timedelta64_dtype,
34+
needs_i8_conversion,
3235
pandas_dtype,
3336
)
3437
from pandas.core.dtypes.dtypes import IntervalDtype
@@ -40,7 +43,7 @@
4043
)
4144
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna, notna
4245

43-
from pandas.core.algorithms import take, value_counts
46+
from pandas.core.algorithms import isin, take, value_counts
4447
from pandas.core.arrays.base import ExtensionArray, _extension_array_shared_docs
4548
from pandas.core.arrays.categorical import Categorical
4649
import pandas.core.common as com
@@ -1435,6 +1438,43 @@ def contains(self, other):
14351438
other < self._right if self.open_right else other <= self._right
14361439
)
14371440

1441+
def isin(self, values) -> np.ndarray:
1442+
if not hasattr(values, "dtype"):
1443+
values = np.array(values)
1444+
values = extract_array(values, extract_numpy=True)
1445+
1446+
if is_interval_dtype(values.dtype):
1447+
if self.closed != values.closed:
1448+
# not comparable -> no overlap
1449+
return np.zeros(self.shape, dtype=bool)
1450+
1451+
if is_dtype_equal(self.dtype, values.dtype):
1452+
# GH#38353 instead of casting to object, operating on a
1453+
# complex128 ndarray is much more performant.
1454+
1455+
# error: "ArrayLike" has no attribute "view" [attr-defined]
1456+
left = self._combined.view("complex128") # type:ignore[attr-defined]
1457+
right = values._combined.view("complex128")
1458+
return np.in1d(left, right)
1459+
1460+
elif needs_i8_conversion(self.left.dtype) ^ needs_i8_conversion(
1461+
values.left.dtype
1462+
):
1463+
# not comparable -> no overlap
1464+
return np.zeros(self.shape, dtype=bool)
1465+
1466+
return isin(self.astype(object), values.astype(object))
1467+
1468+
@property
1469+
def _combined(self) -> ArrayLike:
1470+
left = self.left._values.reshape(-1, 1)
1471+
right = self.right._values.reshape(-1, 1)
1472+
if needs_i8_conversion(left.dtype):
1473+
comb = left._concat_same_type([left, right], axis=1)
1474+
else:
1475+
comb = np.concatenate([left, right], axis=1)
1476+
return comb
1477+
14381478

14391479
def maybe_convert_platform_interval(values):
14401480
"""

0 commit comments

Comments
 (0)