Skip to content

BUG: IntervalIndex, PeriodIndex, DatetimeIndex symmetric_difference with Categorical #38741

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 6 commits into from
Dec 28, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v1.3.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,7 @@ Datetimelike
- Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`)
- Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`)
- Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`)
- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`)

Timedelta
^^^^^^^^^
Expand Down Expand Up @@ -221,7 +222,7 @@ Strings

Interval
^^^^^^^^
- Bug in :meth:`IntervalIndex.intersection` always returning object-dtype when intersecting with :class:`CategoricalIndex` (:issue:`38653`)
- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`)
-
-

Expand Down
14 changes: 12 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2606,6 +2606,7 @@ def _validate_sort_keyword(self, sort):
f"None or False; {sort} was passed."
)

@final
def union(self, other, sort=None):
"""
Form the union of two Index objects.
Expand Down Expand Up @@ -2818,6 +2819,7 @@ def _wrap_setop_result(self, other, result):
return self._shallow_copy(result, name=name)

# TODO: standardize return type of non-union setops type(self vs other)
@final
def intersection(self, other, sort=False):
"""
Form the intersection of two Index objects.
Expand Down Expand Up @@ -3035,9 +3037,17 @@ def symmetric_difference(self, other, result_name=None, sort=None):
if result_name is None:
result_name = result_name_update

if not self._should_compare(other):
return self.union(other).rename(result_name)
elif not is_dtype_equal(self.dtype, other.dtype):
dtype = find_common_type([self.dtype, other.dtype])
this = self.astype(dtype, copy=False)
that = other.astype(dtype, copy=False)
return this.symmetric_difference(that, sort=sort).rename(result_name)

this = self._get_unique_index()
other = other._get_unique_index()
indexer = this.get_indexer(other)
indexer = this.get_indexer_for(other)

# {this} minus {other}
common_indexer = indexer.take((indexer != -1).nonzero()[0])
Expand All @@ -3057,7 +3067,7 @@ def symmetric_difference(self, other, result_name=None, sort=None):
except TypeError:
pass

return Index(the_diff, dtype=self.dtype, name=result_name)
return Index(the_diff, name=result_name)

def _assert_can_do_setop(self, other):
if not is_list_like(other):
Expand Down
1 change: 0 additions & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -1016,7 +1016,6 @@ def func(self, other, sort=sort):

_union = _setop("union")
difference = _setop("difference")
symmetric_difference = _setop("symmetric_difference")

# --------------------------------------------------------------------

Expand Down
12 changes: 0 additions & 12 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@
from pandas.core.dtypes.common import (
is_bool_dtype,
is_datetime64_any_dtype,
is_dtype_equal,
is_float,
is_integer,
is_object_dtype,
is_scalar,
pandas_dtype,
)
Expand Down Expand Up @@ -635,16 +633,6 @@ def _setop(self, other, sort, opname: str):
def _intersection(self, other, sort=False):
return self._setop(other, sort, opname="intersection")

def _difference(self, other, sort):

if is_object_dtype(other):
return self.astype(object).difference(other).astype(self.dtype)

elif not is_dtype_equal(self.dtype, other.dtype):
return self

return self._setop(other, sort, opname="difference")

def _union(self, other, sort):
return self._setop(other, sort, opname="_union")

Expand Down
1 change: 1 addition & 0 deletions pandas/tests/indexes/interval/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,7 @@ def test_symmetric_difference(self, closed, sort):
index.left.astype("float64"), index.right, closed=closed
)
result = index.symmetric_difference(other, sort=sort)
expected = empty_index(dtype="float64", closed=closed)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
Expand Down
15 changes: 9 additions & 6 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -248,13 +248,14 @@ def test_symmetric_difference(self, index):
# GH#10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
result = first.symmetric_difference(case)

if is_datetime64tz_dtype(first):
with pytest.raises(ValueError, match="Tz-aware"):
# `second.values` casts to tznaive
# TODO: should the symmetric_difference then be the union?
first.symmetric_difference(case)
# second.values casts to tznaive
expected = first.union(case)
tm.assert_index_equal(result, expected)
continue
result = first.symmetric_difference(case)

assert tm.equalContents(result, answer)

if isinstance(index, MultiIndex):
Expand Down Expand Up @@ -448,7 +449,9 @@ def test_intersection_difference_match_empty(self, index, sort):
tm.assert_index_equal(inter, diff, exact=True)


@pytest.mark.parametrize("method", ["intersection", "union"])
@pytest.mark.parametrize(
"method", ["intersection", "union", "difference", "symmetric_difference"]
)
def test_setop_with_categorical(index, sort, method):
if isinstance(index, MultiIndex):
# tested separately in tests.indexes.multi.test_setops
Expand Down