-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
BUG: Fix bug with symmetric difference of two equal MultiIndexes GH12490 #13504
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2042,9 +2042,7 @@ def intersection(self, other): | |
other_tuples = other._values | ||
uniq_tuples = sorted(set(self_tuples) & set(other_tuples)) | ||
if len(uniq_tuples) == 0: | ||
return MultiIndex(levels=[[]] * self.nlevels, | ||
labels=[[]] * self.nlevels, | ||
names=result_names, verify_integrity=False) | ||
return self._create_as_empty(names=result_names) | ||
else: | ||
return MultiIndex.from_arrays(lzip(*uniq_tuples), sortorder=0, | ||
names=result_names) | ||
|
@@ -2053,6 +2051,10 @@ def difference(self, other): | |
""" | ||
Compute sorted set difference of two MultiIndex objects | ||
|
||
Parameters | ||
---------- | ||
other : MultiIndex or array / Index of tuples | ||
|
||
Returns | ||
------- | ||
diff : MultiIndex | ||
|
@@ -2064,20 +2066,69 @@ def difference(self, other): | |
return self | ||
|
||
if self.equals(other): | ||
return MultiIndex(levels=[[]] * self.nlevels, | ||
labels=[[]] * self.nlevels, | ||
names=result_names, verify_integrity=False) | ||
return self._create_as_empty(names=result_names) | ||
|
||
difference = sorted(set(self._values) - set(other._values)) | ||
|
||
if len(difference) == 0: | ||
return MultiIndex(levels=[[]] * self.nlevels, | ||
labels=[[]] * self.nlevels, | ||
names=result_names, verify_integrity=False) | ||
return self._create_as_empty(names=result_names) | ||
else: | ||
return MultiIndex.from_tuples(difference, sortorder=0, | ||
names=result_names) | ||
|
||
def _create_as_empty(self, nlevels=None, names=None, | ||
verify_integrity=False): | ||
""" | ||
Creates an empty MultiIndex | ||
|
||
Parameters | ||
------- | ||
nlevels : optional int, default None | ||
The number of levels in the empty MultiIndex. If None defaults | ||
to the current number of levels | ||
names : optional sequence of objects, default None | ||
Names for each of the index levels. If None defaults to the | ||
current names. | ||
verify_integrity : boolean, default False | ||
Check that the levels/labels are consistent and valid | ||
|
||
Returns | ||
------- | ||
empty : MultiIndex | ||
""" | ||
|
||
if nlevels is None: | ||
nlevels = len(self.levels) | ||
if names is None: | ||
names = self.names | ||
|
||
return MultiIndex(levels=[[]] * nlevels, | ||
labels=[[]] * nlevels, | ||
names=names, verify_integrity=verify_integrity) | ||
|
||
def symmetric_difference(self, other, result_name=None): | ||
""" | ||
Compute sorted set symmetric difference of two MultiIndex objects | ||
|
||
Returns | ||
------- | ||
diff : MultiIndex | ||
""" | ||
self._assert_can_do_setop(other) | ||
other, result_name_update = self._convert_can_do_setop(other) | ||
|
||
if result_name is None: | ||
result_name = result_name_update | ||
|
||
if self.equals(other): | ||
return self._create_as_empty(names=result_name) | ||
|
||
difference = sorted(set((self.difference(other)). | ||
union(other.difference(self)))) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just a comment: This sorting breaks with mixed-int values (same issue as in #13432). I suppose this line may be safely replaced with the code from PR #13514, slightly changed, namely: this = self._get_unique_index()
other = other._get_unique_index()
indexer = this.get_indexer(other)
# {this} minus {other}
common_indexer = indexer.take((indexer != -1).nonzero()[0])
left_indexer = np.setdiff1d(np.arange(this.size), common_indexer,
assume_unique=True)
left_diff = this.values.take(left_indexer)
# {other} minus {this}
right_indexer = (indexer == -1).nonzero()[0]
right_diff = other.values.take(right_indexer)
the_diff = _concat._concat_compat([left_diff, right_diff]) Then if return self._shallow_copy(the_diff, names=result_name).sortlevel() (In comparison to Similar change may be applied to |
||
|
||
return MultiIndex.from_tuples(difference, sortorder=0, | ||
names=result_name) | ||
|
||
@Appender(_index_shared_docs['astype']) | ||
def astype(self, dtype, copy=True): | ||
if not is_object_dtype(np.dtype(dtype)): | ||
|
@@ -2092,9 +2143,7 @@ def _convert_can_do_setop(self, other): | |
|
||
if not hasattr(other, 'names'): | ||
if len(other) == 0: | ||
other = MultiIndex(levels=[[]] * self.nlevels, | ||
labels=[[]] * self.nlevels, | ||
verify_integrity=False) | ||
other = self._create_as_empty() | ||
else: | ||
msg = 'other must be a MultiIndex or a list of tuples' | ||
try: | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add parameters section