Skip to content

Commit bbe9dcb

Browse files
jbrockmendelmeeseeksmachine
authored andcommitted
Backport PR pandas-dev#42197: PERF: IntervalArray.unique, IntervalIndex.intersection
1 parent 2219e45 commit bbe9dcb

File tree

3 files changed

+33
-15
lines changed

3 files changed

+33
-15
lines changed

pandas/core/algorithms.py

+7-11
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,7 @@
1111
Union,
1212
cast,
1313
)
14-
from warnings import (
15-
catch_warnings,
16-
simplefilter,
17-
warn,
18-
)
14+
from warnings import warn
1915

2016
import numpy as np
2117

@@ -159,12 +155,10 @@ def _ensure_data(values: ArrayLike) -> tuple[np.ndarray, DtypeObj]:
159155
return np.asarray(values), values.dtype
160156

161157
elif is_complex_dtype(values.dtype):
162-
# ignore the fact that we are casting to float
163-
# which discards complex parts
164-
with catch_warnings():
165-
simplefilter("ignore", np.ComplexWarning)
166-
values = ensure_float64(values)
167-
return values, np.dtype("float64")
158+
# Incompatible return value type (got "Tuple[Union[Any, ExtensionArray,
159+
# ndarray[Any, Any]], Union[Any, ExtensionDtype]]", expected
160+
# "Tuple[ndarray[Any, Any], Union[dtype[Any], ExtensionDtype]]")
161+
return values, values.dtype # type: ignore[return-value]
168162

169163
# datetimelike
170164
elif needs_i8_conversion(values.dtype):
@@ -246,6 +240,8 @@ def _ensure_arraylike(values) -> ArrayLike:
246240

247241

248242
_hashtables = {
243+
"complex128": htable.Complex128HashTable,
244+
"complex64": htable.Complex64HashTable,
249245
"float64": htable.Float64HashTable,
250246
"float32": htable.Float32HashTable,
251247
"uint64": htable.UInt64HashTable,

pandas/core/arrays/interval.py

+24
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
from pandas.core.algorithms import (
6565
isin,
6666
take,
67+
unique,
6768
value_counts,
6869
)
6970
from pandas.core.arrays.base import (
@@ -1610,6 +1611,29 @@ def _combined(self) -> ArrayLike:
16101611
comb = np.concatenate([left, right], axis=1)
16111612
return comb
16121613

1614+
def _from_combined(self, combined: np.ndarray) -> IntervalArray:
1615+
"""
1616+
Create a new IntervalArray with our dtype from a 1D complex128 ndarray.
1617+
"""
1618+
nc = combined.view("i8").reshape(-1, 2)
1619+
1620+
dtype = self._left.dtype
1621+
if needs_i8_conversion(dtype):
1622+
new_left = type(self._left)._from_sequence(nc[:, 0], dtype=dtype)
1623+
new_right = type(self._right)._from_sequence(nc[:, 1], dtype=dtype)
1624+
else:
1625+
new_left = nc[:, 0].view(dtype)
1626+
new_right = nc[:, 1].view(dtype)
1627+
return self._shallow_copy(left=new_left, right=new_right)
1628+
1629+
def unique(self) -> IntervalArray:
1630+
# Invalid index type "Tuple[slice, int]" for "Union[ExtensionArray,
1631+
# ndarray[Any, Any]]"; expected type "Union[int, integer[Any], slice,
1632+
# Sequence[int], ndarray[Any, Any]]"
1633+
nc = unique(self._combined.view("complex128")[:, 0]) # type: ignore[index]
1634+
nc = nc[:, None]
1635+
return self._from_combined(nc)
1636+
16131637

16141638
def _maybe_convert_platform_interval(values) -> ArrayLike:
16151639
"""

pandas/core/indexes/base.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -3128,10 +3128,8 @@ def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike:
31283128
np.ndarray or ExtensionArray
31293129
The returned array will be unique.
31303130
"""
3131-
# Note: drop_duplicates vs unique matters for MultiIndex, though
3132-
# it should not, see GH#41823
3133-
left_unique = self.drop_duplicates()
3134-
right_unique = other.drop_duplicates()
3131+
left_unique = self.unique()
3132+
right_unique = other.unique()
31353133

31363134
# even though we are unique, we need get_indexer_for for IntervalIndex
31373135
indexer = left_unique.get_indexer_for(right_unique)

0 commit comments

Comments
 (0)