Skip to content

Commit 061642a

Browse files
Backport PR #42318: PERF/REGR: symmetric_difference revert most of #41833 (#42324)
* Backport PR #42318: PERF/REGR: symmetric_difference revert most of #41833 * remove if self._is_multi block * remove MultiIndex symmetric_difference Co-authored-by: jbrockmendel <[email protected]>
1 parent f1d1367 commit 061642a

File tree

4 files changed

+58
-18
lines changed

4 files changed

+58
-18
lines changed

pandas/core/indexes/base.py

+40-5
Original file line numberDiff line numberDiff line change
@@ -3254,12 +3254,47 @@ def symmetric_difference(self, other, result_name=None, sort=None):
32543254
if result_name is None:
32553255
result_name = result_name_update
32563256

3257-
left = self.difference(other, sort=False)
3258-
right = other.difference(self, sort=False)
3259-
result = left.union(right, sort=sort)
3257+
if not self._should_compare(other):
3258+
return self.union(other, sort=sort).rename(result_name)
3259+
3260+
elif not is_dtype_equal(self.dtype, other.dtype):
3261+
dtype = self._find_common_type_compat(other)
3262+
this = self.astype(dtype, copy=False)
3263+
that = other.astype(dtype, copy=False)
3264+
return this.symmetric_difference(that, sort=sort).rename(result_name)
3265+
3266+
this = self.unique()
3267+
other = other.unique()
3268+
indexer = this.get_indexer_for(other)
3269+
3270+
# {this} minus {other}
3271+
common_indexer = indexer.take((indexer != -1).nonzero()[0])
3272+
left_indexer = np.setdiff1d(
3273+
np.arange(this.size), common_indexer, assume_unique=True
3274+
)
3275+
left_diff = this._values.take(left_indexer)
3276+
3277+
# {other} minus {this}
3278+
right_indexer = (indexer == -1).nonzero()[0]
3279+
right_diff = other._values.take(right_indexer)
3280+
3281+
res_values = concat_compat([left_diff, right_diff])
3282+
res_values = _maybe_try_sort(res_values, sort)
3283+
3284+
result = Index(res_values, name=result_name)
3285+
3286+
if self._is_multi:
3287+
self = cast("MultiIndex", self)
3288+
if len(result) == 0:
3289+
# On equal symmetric_difference MultiIndexes the difference is empty.
3290+
# Therefore, an empty MultiIndex is returned GH#13490
3291+
return type(self)(
3292+
levels=[[] for _ in range(self.nlevels)],
3293+
codes=[[] for _ in range(self.nlevels)],
3294+
names=result.name,
3295+
)
3296+
return type(self).from_tuples(result, names=result.name)
32603297

3261-
if result_name is not None:
3262-
result = result.rename(result_name)
32633298
return result
32643299

32653300
@final

pandas/core/indexes/datetimelike.py

+6-1
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
inherit_names,
6464
make_wrapped_arith_op,
6565
)
66+
from pandas.core.indexes.numeric import Int64Index
6667
from pandas.core.tools.timedeltas import to_timedelta
6768

6869
if TYPE_CHECKING:
@@ -784,7 +785,11 @@ def _union(self, other, sort):
784785
# that result.freq == self.freq
785786
return result
786787
else:
787-
return super()._union(other, sort=sort)._with_freq("infer")
788+
i8self = Int64Index._simple_new(self.asi8)
789+
i8other = Int64Index._simple_new(other.asi8)
790+
i8result = i8self._union(i8other, sort=sort)
791+
result = type(self)(i8result, dtype=self.dtype, freq="infer")
792+
return result
788793

789794
# --------------------------------------------------------------------
790795
# Join Methods

pandas/core/indexes/multi.py

-12
Original file line numberDiff line numberDiff line change
@@ -3636,18 +3636,6 @@ def _convert_can_do_setop(self, other):
36363636

36373637
return other, result_names
36383638

3639-
def symmetric_difference(self, other, result_name=None, sort=None):
3640-
# On equal symmetric_difference MultiIndexes the difference is empty.
3641-
# Therefore, an empty MultiIndex is returned GH13490
3642-
tups = Index.symmetric_difference(self, other, result_name, sort)
3643-
if len(tups) == 0:
3644-
return type(self)(
3645-
levels=[[] for _ in range(self.nlevels)],
3646-
codes=[[] for _ in range(self.nlevels)],
3647-
names=tups.names,
3648-
)
3649-
return tups
3650-
36513639
# --------------------------------------------------------------------
36523640

36533641
@doc(Index.astype)

pandas/core/indexes/range.py

+12
Original file line numberDiff line numberDiff line change
@@ -730,6 +730,18 @@ def _difference(self, other, sort=None):
730730
new_index = new_index[::-1]
731731
return new_index
732732

733+
def symmetric_difference(self, other, result_name: Hashable = None, sort=None):
734+
if not isinstance(other, RangeIndex) or sort is not None:
735+
return super().symmetric_difference(other, result_name, sort)
736+
737+
left = self.difference(other)
738+
right = other.difference(self)
739+
result = left.union(right)
740+
741+
if result_name is not None:
742+
result = result.rename(result_name)
743+
return result
744+
733745
# --------------------------------------------------------------------
734746

735747
def _concat(self, indexes: list[Index], name: Hashable) -> Index:

0 commit comments

Comments
 (0)