Skip to content

Commit 0a9b608

Browse files
jbrockmendelsimonjayhawkins
authored andcommitted
PERF/REGR: symmetric_difference revert most of pandas-dev#41833 (pandas-dev#42318)
* PERF/REGR: symmetric_difference revert most of 41833 * mypy fixup Co-authored-by: Simon Hawkins <[email protected]>
1 parent e31d50a commit 0a9b608

File tree

3 files changed

+56
-15
lines changed

3 files changed

+56
-15
lines changed

pandas/core/indexes/base.py

+38-14
Original file line numberDiff line numberDiff line change
@@ -3209,7 +3209,6 @@ def _wrap_difference_result(self, other, result):
32093209
# We will override for MultiIndex to handle empty results
32103210
return self._wrap_setop_result(other, result)
32113211

3212-
@final
32133212
def symmetric_difference(self, other, result_name=None, sort=None):
32143213
"""
32153214
Compute the symmetric difference of two Index objects.
@@ -3251,21 +3250,46 @@ def symmetric_difference(self, other, result_name=None, sort=None):
32513250
if result_name is None:
32523251
result_name = result_name_update
32533252

3254-
left = self.difference(other, sort=False)
3255-
right = other.difference(self, sort=False)
3256-
result = left.union(right, sort=sort)
3253+
if not self._should_compare(other):
3254+
return self.union(other, sort=sort).rename(result_name)
32573255

3258-
if result_name is not None:
3259-
result = result.rename(result_name)
3256+
elif not is_dtype_equal(self.dtype, other.dtype):
3257+
dtype = self._find_common_type_compat(other)
3258+
this = self.astype(dtype, copy=False)
3259+
that = other.astype(dtype, copy=False)
3260+
return this.symmetric_difference(that, sort=sort).rename(result_name)
32603261

3261-
if self._is_multi and len(result) == 0:
3262-
# On equal symmetric_difference MultiIndexes the difference is empty.
3263-
# Therefore, an empty MultiIndex is returned GH#13490
3264-
return type(self)(
3265-
levels=[[] for _ in range(self.nlevels)],
3266-
codes=[[] for _ in range(self.nlevels)],
3267-
names=result.names,
3268-
)
3262+
this = self.unique()
3263+
other = other.unique()
3264+
indexer = this.get_indexer_for(other)
3265+
3266+
# {this} minus {other}
3267+
common_indexer = indexer.take((indexer != -1).nonzero()[0])
3268+
left_indexer = np.setdiff1d(
3269+
np.arange(this.size), common_indexer, assume_unique=True
3270+
)
3271+
left_diff = this._values.take(left_indexer)
3272+
3273+
# {other} minus {this}
3274+
right_indexer = (indexer == -1).nonzero()[0]
3275+
right_diff = other._values.take(right_indexer)
3276+
3277+
res_values = concat_compat([left_diff, right_diff])
3278+
res_values = _maybe_try_sort(res_values, sort)
3279+
3280+
result = Index(res_values, name=result_name)
3281+
3282+
if self._is_multi:
3283+
self = cast("MultiIndex", self)
3284+
if len(result) == 0:
3285+
# On equal symmetric_difference MultiIndexes the difference is empty.
3286+
# Therefore, an empty MultiIndex is returned GH#13490
3287+
return type(self)(
3288+
levels=[[] for _ in range(self.nlevels)],
3289+
codes=[[] for _ in range(self.nlevels)],
3290+
names=result.name,
3291+
)
3292+
return type(self).from_tuples(result, names=result.name)
32693293

32703294
return result
32713295

pandas/core/indexes/datetimelike.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
NDArrayBackedExtensionIndex,
6565
inherit_names,
6666
)
67+
from pandas.core.indexes.numeric import Int64Index
6768
from pandas.core.tools.timedeltas import to_timedelta
6869

6970
if TYPE_CHECKING:
@@ -680,4 +681,8 @@ def _union(self, other, sort):
680681
# that result.freq == self.freq
681682
return result
682683
else:
683-
return super()._union(other, sort=sort)._with_freq("infer")
684+
i8self = Int64Index._simple_new(self.asi8)
685+
i8other = Int64Index._simple_new(other.asi8)
686+
i8result = i8self._union(i8other, sort=sort)
687+
result = type(self)(i8result, dtype=self.dtype, freq="infer")
688+
return result

pandas/core/indexes/range.py

+12
Original file line numberDiff line numberDiff line change
@@ -731,6 +731,18 @@ def _difference(self, other, sort=None):
731731
new_index = new_index[::-1]
732732
return new_index
733733

734+
def symmetric_difference(self, other, result_name: Hashable = None, sort=None):
735+
if not isinstance(other, RangeIndex) or sort is not None:
736+
return super().symmetric_difference(other, result_name, sort)
737+
738+
left = self.difference(other)
739+
right = other.difference(self)
740+
result = left.union(right)
741+
742+
if result_name is not None:
743+
result = result.rename(result_name)
744+
return result
745+
734746
# --------------------------------------------------------------------
735747

736748
def _concat(self, indexes: list[Index], name: Hashable) -> Index:

0 commit comments

Comments
 (0)