From 626e4093dd1f0e3fb824da59b030fd2f0c09ee2a Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Dec 2020 21:32:03 -0800 Subject: [PATCH 1/5] BUG: MultiIndex, IntervalIndex intersection with Categorical --- pandas/core/indexes/period.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8df7e6912b1b2..adcbdfc32fbe0 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -11,6 +11,7 @@ from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly, doc +from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_any_dtype, From 0ee2fc1e09b06f39e3bbb667aa509817b6e91789 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Dec 2020 07:33:31 -0800 Subject: [PATCH 2/5] standardize --- pandas/core/indexes/datetimelike.py | 2 ++ pandas/core/indexes/multi.py | 3 ++- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index d673d1b43f729..c1e122ad10ab2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -12,6 +12,7 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly, doc +from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, @@ -654,6 +655,7 @@ def difference(self, other, sort=None): new_idx = super().difference(other, sort=sort)._with_freq(None) return new_idx + def _intersection(self, other: Index, sort=False) -> Index: """ intersection specialized to the case with matching dtypes. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 06a04e5a9b9eb..450dbbb38a887 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -25,11 +25,12 @@ from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, doc -from pandas.core.dtypes.cast import coerce_indexer_dtype +from pandas.core.dtypes.cast import coerce_indexer_dtype, find_common_type from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, + is_dtype_equal, is_hashable, is_integer, is_iterator, From 4206483d84adfb5104472bcc14b961fcce9049a3 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 23 Dec 2020 08:53:46 -0800 Subject: [PATCH 3/5] Share intersection --- pandas/core/indexes/datetimelike.py | 2 -- pandas/core/indexes/multi.py | 3 +-- pandas/core/indexes/period.py | 1 - 3 files changed, 1 insertion(+), 5 deletions(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index c1e122ad10ab2..d673d1b43f729 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -12,7 +12,6 @@ from pandas.compat.numpy import function as nv from pandas.util._decorators import Appender, cache_readonly, doc -from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( is_bool_dtype, is_categorical_dtype, @@ -655,7 +654,6 @@ def difference(self, other, sort=None): new_idx = super().difference(other, sort=sort)._with_freq(None) return new_idx - def _intersection(self, other: Index, sort=False) -> Index: """ intersection specialized to the case with matching dtypes. diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 450dbbb38a887..06a04e5a9b9eb 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -25,12 +25,11 @@ from pandas.errors import InvalidIndexError, PerformanceWarning, UnsortedIndexError from pandas.util._decorators import Appender, cache_readonly, doc -from pandas.core.dtypes.cast import coerce_indexer_dtype, find_common_type +from pandas.core.dtypes.cast import coerce_indexer_dtype from pandas.core.dtypes.common import ( ensure_int64, ensure_platform_int, is_categorical_dtype, - is_dtype_equal, is_hashable, is_integer, is_iterator, diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index adcbdfc32fbe0..8df7e6912b1b2 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -11,7 +11,6 @@ from pandas.errors import InvalidIndexError from pandas.util._decorators import cache_readonly, doc -from pandas.core.dtypes.cast import find_common_type from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_any_dtype, From 77753da0a6263e10452585bde3480a2615165a1c Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 27 Dec 2020 19:32:49 -0800 Subject: [PATCH 4/5] BUG: IntervalIndex, PeriodIndex symmetric_difference with CategoricalIndex --- pandas/core/indexes/base.py | 14 ++++++++++++-- pandas/core/indexes/interval.py | 1 - pandas/core/indexes/period.py | 12 ------------ pandas/tests/indexes/interval/test_setops.py | 1 + pandas/tests/indexes/test_setops.py | 15 +++++++++------ 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index e425ee1a78de5..275c977e9b37b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2606,6 +2606,7 @@ def _validate_sort_keyword(self, sort): f"None or False; {sort} was passed." ) + @final def union(self, other, sort=None): """ Form the union of two Index objects. @@ -2818,6 +2819,7 @@ def _wrap_setop_result(self, other, result): return self._shallow_copy(result, name=name) # TODO: standardize return type of non-union setops type(self vs other) + @final def intersection(self, other, sort=False): """ Form the intersection of two Index objects. @@ -3035,9 +3037,17 @@ def symmetric_difference(self, other, result_name=None, sort=None): if result_name is None: result_name = result_name_update + if not self._should_compare(other): + return self.union(other).rename(result_name) + elif not is_dtype_equal(self.dtype, other.dtype): + dtype = find_common_type([self.dtype, other.dtype]) + this = self.astype(dtype, copy=False) + that = other.astype(dtype, copy=False) + return this.symmetric_difference(that, sort=sort).rename(result_name) + this = self._get_unique_index() other = other._get_unique_index() - indexer = this.get_indexer(other) + indexer = this.get_indexer_for(other) # {this} minus {other} common_indexer = indexer.take((indexer != -1).nonzero()[0]) @@ -3057,7 +3067,7 @@ def symmetric_difference(self, other, result_name=None, sort=None): except TypeError: pass - return Index(the_diff, dtype=self.dtype, name=result_name) + return Index(the_diff, name=result_name) def _assert_can_do_setop(self, other): if not is_list_like(other): diff --git a/pandas/core/indexes/interval.py b/pandas/core/indexes/interval.py index 1b3c4cfcccd2b..824d78d1a8d05 100644 --- a/pandas/core/indexes/interval.py +++ b/pandas/core/indexes/interval.py @@ -1016,7 +1016,6 @@ def func(self, other, sort=sort): _union = _setop("union") difference = _setop("difference") - symmetric_difference = _setop("symmetric_difference") # -------------------------------------------------------------------- diff --git a/pandas/core/indexes/period.py b/pandas/core/indexes/period.py index 8df7e6912b1b2..7746d7e617f8b 100644 --- a/pandas/core/indexes/period.py +++ b/pandas/core/indexes/period.py @@ -14,10 +14,8 @@ from pandas.core.dtypes.common import ( is_bool_dtype, is_datetime64_any_dtype, - is_dtype_equal, is_float, is_integer, - is_object_dtype, is_scalar, pandas_dtype, ) @@ -635,16 +633,6 @@ def _setop(self, other, sort, opname: str): def _intersection(self, other, sort=False): return self._setop(other, sort, opname="intersection") - def _difference(self, other, sort): - - if is_object_dtype(other): - return self.astype(object).difference(other).astype(self.dtype) - - elif not is_dtype_equal(self.dtype, other.dtype): - return self - - return self._setop(other, sort, opname="difference") - def _union(self, other, sort): return self._setop(other, sort, opname="_union") diff --git a/pandas/tests/indexes/interval/test_setops.py b/pandas/tests/indexes/interval/test_setops.py index 7bfe81e0645cb..4b7901407d94a 100644 --- a/pandas/tests/indexes/interval/test_setops.py +++ b/pandas/tests/indexes/interval/test_setops.py @@ -158,6 +158,7 @@ def test_symmetric_difference(self, closed, sort): index.left.astype("float64"), index.right, closed=closed ) result = index.symmetric_difference(other, sort=sort) + expected = empty_index(dtype="float64", closed=closed) tm.assert_index_equal(result, expected) @pytest.mark.parametrize( diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py index 538e937703de6..1035ac1f0e60b 100644 --- a/pandas/tests/indexes/test_setops.py +++ b/pandas/tests/indexes/test_setops.py @@ -248,13 +248,14 @@ def test_symmetric_difference(self, index): # GH#10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: + result = first.symmetric_difference(case) + if is_datetime64tz_dtype(first): - with pytest.raises(ValueError, match="Tz-aware"): - # `second.values` casts to tznaive - # TODO: should the symmetric_difference then be the union? - first.symmetric_difference(case) + # second.values casts to tznaive + expected = first.union(case) + tm.assert_index_equal(result, expected) continue - result = first.symmetric_difference(case) + assert tm.equalContents(result, answer) if isinstance(index, MultiIndex): @@ -448,7 +449,9 @@ def test_intersection_difference_match_empty(self, index, sort): tm.assert_index_equal(inter, diff, exact=True) -@pytest.mark.parametrize("method", ["intersection", "union"]) +@pytest.mark.parametrize( + "method", ["intersection", "union", "difference", "symmetric_difference"] +) def test_setop_with_categorical(index, sort, method): if isinstance(index, MultiIndex): # tested separately in tests.indexes.multi.test_setops From bc423210d6e0f336223f0d228cf21edb7c522497 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 28 Dec 2020 11:57:37 -0800 Subject: [PATCH 5/5] whatsnew --- doc/source/whatsnew/v1.3.0.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 35785613fb1e2..601eb782653f9 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -190,6 +190,7 @@ Datetimelike - Bug in :class:`DataFrame` and :class:`Series` constructors sometimes dropping nanoseconds from :class:`Timestamp` (resp. :class:`Timedelta`) ``data``, with ``dtype=datetime64[ns]`` (resp. ``timedelta64[ns]``) (:issue:`38032`) - Bug in :meth:`DataFrame.first` and :meth:`Series.first` returning two months for offset one month when first day is last calendar day (:issue:`29623`) - Bug in constructing a :class:`DataFrame` or :class:`Series` with mismatched ``datetime64`` data and ``timedelta64`` dtype, or vice-versa, failing to raise ``TypeError`` (:issue:`38575`) +- Bug in :meth:`DatetimeIndex.intersection`, :meth:`DatetimeIndex.symmetric_difference`, :meth:`PeriodIndex.intersection`, :meth:`PeriodIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38741`) Timedelta ^^^^^^^^^ @@ -221,7 +222,7 @@ Strings Interval ^^^^^^^^ -- Bug in :meth:`IntervalIndex.intersection` always returning object-dtype when intersecting with :class:`CategoricalIndex` (:issue:`38653`) +- Bug in :meth:`IntervalIndex.intersection` and :meth:`IntervalIndex.symmetric_difference` always returning object-dtype when operating with :class:`CategoricalIndex` (:issue:`38653`, :issue:`38741`) - -