Skip to content

Commit 3da2423

Browse files
jbrockmendelJulianWgs
authored andcommitted
REF: de-duplicate Index._intersection + MultiIndex._intersection (pandas-dev#41824)
1 parent 6de84ab commit 3da2423

File tree

2 files changed

+26
-15
lines changed

2 files changed

+26
-15
lines changed

pandas/core/indexes/base.py

+25-12
Original file line numberDiff line numberDiff line change
@@ -3098,7 +3098,6 @@ def _intersection(self, other: Index, sort=False):
30983098
intersection specialized to the case with matching dtypes.
30993099
"""
31003100
# TODO(EA): setops-refactor, clean all this up
3101-
lvals = self._values
31023101

31033102
if self.is_monotonic and other.is_monotonic:
31043103
try:
@@ -3110,21 +3109,35 @@ def _intersection(self, other: Index, sort=False):
31103109
res = algos.unique1d(result)
31113110
return ensure_wrapped_if_datetimelike(res)
31123111

3113-
try:
3114-
indexer = other.get_indexer(lvals)
3115-
except InvalidIndexError:
3116-
# InvalidIndexError raised by get_indexer if non-unique
3117-
indexer, _ = other.get_indexer_non_unique(lvals)
3112+
res_values = self._intersection_via_get_indexer(other, sort=sort)
3113+
res_values = _maybe_try_sort(res_values, sort)
3114+
return res_values
31183115

3119-
mask = indexer != -1
3120-
indexer = indexer.take(mask.nonzero()[0])
3116+
def _intersection_via_get_indexer(self, other: Index, sort) -> ArrayLike:
3117+
"""
3118+
Find the intersection of two Indexes using get_indexer.
3119+
3120+
Returns
3121+
-------
3122+
np.ndarray or ExtensionArray
3123+
The returned array will be unique.
3124+
"""
3125+
# Note: drop_duplicates vs unique matters for MultiIndex, though
3126+
# it should not, see GH#41823
3127+
left_unique = self.drop_duplicates()
3128+
right_unique = other.drop_duplicates()
31213129

3122-
result = other.take(indexer).unique()._values
3123-
result = _maybe_try_sort(result, sort)
3130+
indexer = left_unique.get_indexer(right_unique)
3131+
3132+
mask = indexer != -1
31243133

3125-
# Intersection has to be unique
3126-
assert Index(result).is_unique
3134+
taker = indexer.take(mask.nonzero()[0])
3135+
if sort is False:
3136+
# sort bc we want the elements in the same order they are in self
3137+
# unnecessary in the case with sort=None bc we will sort later
3138+
taker = np.sort(taker)
31273139

3140+
result = left_unique.take(taker)._values
31283141
return result
31293142

31303143
@final

pandas/core/indexes/multi.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -3612,9 +3612,7 @@ def _intersection(self, other, sort=False) -> MultiIndex:
36123612
uniq_tuples = algos.unique(inner_tuples)
36133613

36143614
if uniq_tuples is None:
3615-
left_unique = self.drop_duplicates()
3616-
indexer = left_unique.get_indexer(other.drop_duplicates())
3617-
uniq_tuples = left_unique.take(np.sort(indexer[indexer != -1]))
3615+
uniq_tuples = self._intersection_via_get_indexer(other, sort)
36183616

36193617
if sort is None:
36203618
uniq_tuples = sorted(uniq_tuples)

0 commit comments

Comments
 (0)