diff --git a/doc/source/whatsnew/v0.18.2.txt b/doc/source/whatsnew/v0.18.2.txt index 40fec4d071f16..b581e71ec5c50 100644 --- a/doc/source/whatsnew/v0.18.2.txt +++ b/doc/source/whatsnew/v0.18.2.txt @@ -482,7 +482,7 @@ Bug Fixes - Bug in ``pd.read_hdf()`` where attempting to load an HDF file with a single dataset, that had one or more categorical columns, failed unless the key argument was set to the name of the dataset. (:issue:`13231`) - Bug in ``.rolling()`` that allowed a negative integer window in contruction of the ``Rolling()`` object, but would later fail on aggregation (:issue:`13383`) - +- Bug in ``MultiIndex.symmetric_difference`` with two equal MultiIndexes (:issue:`13490`) - Bug in various index types, which did not propagate the name of passed index (:issue:`12309`) - Bug in ``DatetimeIndex``, which did not honour the ``copy=True`` (:issue:`13205`) - Bug in ``DatetimeIndex.is_normalized`` returns incorrectly for normalized date_range in case of local timezones (:issue:`13459`) @@ -526,4 +526,4 @@ Bug Fixes - Bug in ``pd.to_numeric`` when ``errors='coerce'`` and input contains non-hashable objects (:issue:`13324`) -- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) +- Bug in ``Categorical.remove_unused_categories()`` changes ``.codes`` dtype to platform int (:issue:`13261`) \ No newline at end of file diff --git a/pandas/indexes/multi.py b/pandas/indexes/multi.py index 05b2045a4850f..9b7b280121164 100644 --- a/pandas/indexes/multi.py +++ b/pandas/indexes/multi.py @@ -2042,9 +2042,7 @@ def intersection(self, other): other_tuples = other._values uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) if len(uniq_tuples) == 0: - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) + return self._create_as_empty(names=result_names) else: return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, names=result_names) @@ -2053,6 +2051,10 @@ def difference(self, other): """ Compute sorted set difference of two MultiIndex objects + Parameters + ---------- + other : MultiIndex or array / Index of tuples + Returns ------- diff : MultiIndex @@ -2064,20 +2066,69 @@ def difference(self, other): return self if self.equals(other): - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) + return self._create_as_empty(names=result_names) difference = sorted(set(self._values) - set(other._values)) if len(difference) == 0: - return MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - names=result_names, verify_integrity=False) + return self._create_as_empty(names=result_names) else: return MultiIndex.from_tuples(difference, sortorder=0, names=result_names) + def _create_as_empty(self, nlevels=None, names=None, + verify_integrity=False): + """ + Creates an empty MultiIndex + + Parameters + ------- + nlevels : optional int, default None + The number of levels in the empty MultiIndex. If None defaults + to the current number of levels + names : optional sequence of objects, default None + Names for each of the index levels. If None defaults to the + current names. + verify_integrity : boolean, default False + Check that the levels/labels are consistent and valid + + Returns + ------- + empty : MultiIndex + """ + + if nlevels is None: + nlevels = len(self.levels) + if names is None: + names = self.names + + return MultiIndex(levels=[[]] * nlevels, + labels=[[]] * nlevels, + names=names, verify_integrity=verify_integrity) + + def symmetric_difference(self, other, result_name=None): + """ + Compute sorted set symmetric difference of two MultiIndex objects + + Returns + ------- + diff : MultiIndex + """ + self._assert_can_do_setop(other) + other, result_name_update = self._convert_can_do_setop(other) + + if result_name is None: + result_name = result_name_update + + if self.equals(other): + return self._create_as_empty(names=result_name) + + difference = sorted(set((self.difference(other)). + union(other.difference(self)))) + + return MultiIndex.from_tuples(difference, sortorder=0, + names=result_name) + @Appender(_index_shared_docs['astype']) def astype(self, dtype, copy=True): if not is_object_dtype(np.dtype(dtype)): @@ -2092,9 +2143,7 @@ def _convert_can_do_setop(self, other): if not hasattr(other, 'names'): if len(other) == 0: - other = MultiIndex(levels=[[]] * self.nlevels, - labels=[[]] * self.nlevels, - verify_integrity=False) + other = self._create_as_empty() else: msg = 'other must be a MultiIndex or a list of tuples' try: diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index d535eaa238567..6146618f29fbb 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -752,13 +752,6 @@ def test_symmetric_difference(self): self.assertTrue(tm.equalContents(result, expected)) self.assertIsNone(result.name) - # multiIndex - idx1 = MultiIndex.from_tuples(self.tuples) - idx2 = MultiIndex.from_tuples([('foo', 1), ('bar', 3)]) - result = idx1.symmetric_difference(idx2) - expected = MultiIndex.from_tuples([('bar', 2), ('baz', 3), ('bar', 3)]) - self.assertTrue(tm.equalContents(result, expected)) - # nans: # GH #6444, sorting of nans. Make sure the number of nans is right # and the correct non-nan values are there. punt on sorting. diff --git a/pandas/tests/indexes/test_multi.py b/pandas/tests/indexes/test_multi.py index bec52f5f47b09..7216734a686cb 100644 --- a/pandas/tests/indexes/test_multi.py +++ b/pandas/tests/indexes/test_multi.py @@ -1353,6 +1353,20 @@ def test_difference(self): assertRaisesRegexp(TypeError, "other must be a MultiIndex or a list" " of tuples", first.difference, [1, 2, 3, 4, 5]) + def test_symmetric_difference(self): + idx1 = MultiIndex.from_tuples(self.index, names=('A', 'B')) + idx2 = MultiIndex.from_tuples([('foo', 'one'), ('bar', 'one'), + ('baz', 'two'), ('qux', 'two'), + ('qux', 'one')], names=('A', 'B')) + result = idx1.symmetric_difference(idx2) + expected = MultiIndex.from_tuples([('foo', 'two')], names=('A', 'B')) + tm.assert_index_equal(result, expected) + + # Test for equal multiIndexes + result = self.index.symmetric_difference(self.index) + expected = result._create_as_empty() + tm.assert_index_equal(result, expected) + def test_from_tuples(self): assertRaisesRegexp(TypeError, 'Cannot infer number of levels from' ' empty list', MultiIndex.from_tuples, [])