Skip to content

Commit 1ac2675

Browse files
authored
BUG: MultiIndex.symmetric_difference not keeping ea dtype (#48607)
* BUG: MultiIndex.symmetric_difference not keeping ea dtype * Add tests and whatsnew * Add gh ref
1 parent 02d988c commit 1ac2675

File tree

3 files changed

+28
-19
lines changed

3 files changed

+28
-19
lines changed

doc/source/whatsnew/v1.6.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -181,6 +181,7 @@ MultiIndex
181181
- Bug in :meth:`MultiIndex.unique` losing extension array dtype (:issue:`48335`)
182182
- Bug in :meth:`MultiIndex.union` losing extension array (:issue:`48498`, :issue:`48505`)
183183
- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`)
184+
- Bug in :meth:`MultiIndex.symmetric_difference` losing extension array (:issue:`48607`)
184185
-
185186

186187
I/O

pandas/core/indexes/base.py

+11-19
Original file line numberDiff line numberDiff line change
@@ -3738,31 +3738,23 @@ def symmetric_difference(self, other, result_name=None, sort=None):
37383738
left_indexer = np.setdiff1d(
37393739
np.arange(this.size), common_indexer, assume_unique=True
37403740
)
3741-
left_diff = this._values.take(left_indexer)
3741+
left_diff = this.take(left_indexer)
37423742

37433743
# {other} minus {this}
37443744
right_indexer = (indexer == -1).nonzero()[0]
3745-
right_diff = other._values.take(right_indexer)
3745+
right_diff = other.take(right_indexer)
37463746

3747-
res_values = concat_compat([left_diff, right_diff])
3748-
res_values = _maybe_try_sort(res_values, sort)
3749-
3750-
# pass dtype so we retain object dtype
3751-
result = Index(res_values, name=result_name, dtype=res_values.dtype)
3747+
res_values = left_diff.append(right_diff)
3748+
result = _maybe_try_sort(res_values, sort)
37523749

3753-
if self._is_multi:
3754-
self = cast("MultiIndex", self)
3750+
if not self._is_multi:
3751+
return Index(result, name=result_name, dtype=res_values.dtype)
3752+
else:
3753+
left_diff = cast("MultiIndex", left_diff)
37553754
if len(result) == 0:
3756-
# On equal symmetric_difference MultiIndexes the difference is empty.
3757-
# Therefore, an empty MultiIndex is returned GH#13490
3758-
return type(self)(
3759-
levels=[[] for _ in range(self.nlevels)],
3760-
codes=[[] for _ in range(self.nlevels)],
3761-
names=result.name,
3762-
)
3763-
return type(self).from_tuples(result, names=result.name)
3764-
3765-
return result
3755+
# result might be an Index, if other was an Index
3756+
return left_diff.remove_unused_levels().set_names(result_name)
3757+
return result.set_names(result_name)
37663758

37673759
@final
37683760
def _assert_can_do_setop(self, other) -> bool:

pandas/tests/indexes/multi/test_setops.py

+16
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,22 @@ def test_setops_disallow_true(method):
440440
getattr(idx1, method)(idx2, sort=True)
441441

442442

443+
@pytest.mark.parametrize("val", [pd.NA, 5])
444+
def test_symmetric_difference_keeping_ea_dtype(any_numeric_ea_dtype, val):
445+
# GH#48607
446+
midx = MultiIndex.from_arrays(
447+
[Series([1, 2], dtype=any_numeric_ea_dtype), [2, 1]], names=["a", None]
448+
)
449+
midx2 = MultiIndex.from_arrays(
450+
[Series([1, 2, val], dtype=any_numeric_ea_dtype), [1, 1, 3]]
451+
)
452+
result = midx.symmetric_difference(midx2)
453+
expected = MultiIndex.from_arrays(
454+
[Series([1, 1, val], dtype=any_numeric_ea_dtype), [1, 2, 3]]
455+
)
456+
tm.assert_index_equal(result, expected)
457+
458+
443459
@pytest.mark.parametrize(
444460
("tuples", "exp_tuples"),
445461
[

0 commit comments

Comments
 (0)