From 6f55671680545d02a46f1535437e0eda8ad22136 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 22 Sep 2020 19:00:35 -0700 Subject: [PATCH 1/3] ENH: return RangeIndex from difference, symmetric_difference where possible closes #12034 --- pandas/core/indexes/range.py | 59 ++++++++++++++++++++++ pandas/tests/indexes/ranges/test_setops.py | 48 ++++++++++++++++++ 2 files changed, 107 insertions(+) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 684691501de5c..5ad3da6c737c1 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -466,6 +466,9 @@ def equals(self, other: object) -> bool: return self._range == other._range return super().equals(other) + # -------------------------------------------------------------------- + # Set Operations + def intersection(self, other, sort=False): """ Form the intersection of two Index objects. @@ -632,6 +635,57 @@ def _union(self, other, sort): return type(self)(start_r, end_r + step_o, step_o) return self._int64index._union(other, sort=sort) + def difference(self, other, sort=None): + # optimized set operation if we have another RangeIndex + self._validate_sort_keyword(sort) + + if not isinstance(other, RangeIndex): + return super().difference(other, sort=sort) + + res_name = ops.get_op_result_name(self, other) + + first = self._range[::-1] if self.step < 0 else self._range + overlap = self.intersection(other) + if overlap.step < 0: + overlap = overlap[::-1] + + if len(overlap) == 0: + return self._shallow_copy(name=res_name) + if len(overlap) == len(self): + return self[:0].rename(res_name) + if not isinstance(overlap, RangeIndex): + # We wont end up with RangeIndex, so fall back + return super().difference(other, sort=sort) + + if overlap[0] == first.start: + # The difference is everything after the intersection + new_rng = range(overlap[-1] + first.step, first.stop, first.step) + elif overlap[-1] == first.stop: + # The difference is everything before the intersection + new_rng = range(first.start, overlap[0] - first.step, first.step) + else: + # The difference is not range-like + return super().difference(other, sort=sort) + + new_index = type(self)._simple_new(new_rng, name=res_name) + if first is not self._range: + new_index = new_index[::-1] + return new_index + + def symmetric_difference(self, other, result_name=None, sort=None): + if not isinstance(other, RangeIndex) or sort is not None: + return super().symmetric_difference(other, result_name, sort) + + left = self.difference(other) + right = other.difference(self) + result = left.union(right) + + if result_name is not None: + result = result.rename(result_name) + return result + + # -------------------------------------------------------------------- + @doc(Int64Index.join) def join(self, other, how="left", level=None, return_indexers=False, sort=False): if how == "outer" and self is not other: @@ -744,12 +798,17 @@ def __floordiv__(self, other): return self._simple_new(new_range, name=self.name) return self._int64index // other + # -------------------------------------------------------------------- + # Reductions + def all(self) -> bool: return 0 not in self._range def any(self) -> bool: return any(self._range) + # -------------------------------------------------------------------- + @classmethod def _add_numeric_methods_binary(cls): """ add in numeric methods, specialized to RangeIndex """ diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 5b565310cfb9c..6f04f2aac718a 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -239,3 +239,51 @@ def test_union_sorted(self, unions): res3 = idx1._int64index.union(idx2, sort=None) tm.assert_index_equal(res2, expected_sorted, exact=True) tm.assert_index_equal(res3, expected_sorted) + + def test_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + obj = RangeIndex.from_range(range(1, 10)) + + result = obj.difference(obj) + expected = RangeIndex.from_range(range(0)) + tm.assert_index_equal(result, expected) + + result = obj.difference(expected) + tm.assert_index_equal(result, obj) + + result = obj.difference(obj[:3]) + tm.assert_index_equal(result, obj[3:]) + + result = obj.difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3]) + + result = obj.difference(obj[2:6]) + expected = Int64Index([1, 2, 7, 8, 9]) + tm.assert_index_equal(result, expected) + + def test_symmetric_difference(self): + # GH#12034 Cases where we operate against another RangeIndex and may + # get back another RangeIndex + left = RangeIndex.from_range(range(1, 10)) + + result = left.symmetric_difference(left) + expected = RangeIndex.from_range(range(0)) + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(expected) + tm.assert_index_equal(result, left) + + result = left[:-2].symmetric_difference(left[2:]) + expected = Int64Index([1, 2, 8, 9]) + tm.assert_index_equal(result, expected) + + right = RangeIndex.from_range(range(10, 15)) + + result = left.symmetric_difference(right) + expected = RangeIndex.from_range(range(1, 15)) + tm.assert_index_equal(result, expected) + + result = left.symmetric_difference(right[1:]) + expected = Int64Index([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 12, 13, 14]) + tm.assert_index_equal(result, expected) From 041b99eb995f8efc8adf53d4564887830f89a290 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 26 Sep 2020 10:19:09 -0700 Subject: [PATCH 2/3] test name retentnion --- pandas/tests/indexes/ranges/test_setops.py | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 6f04f2aac718a..9c9f5dbdf7e7f 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -243,14 +243,14 @@ def test_union_sorted(self, unions): def test_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex - obj = RangeIndex.from_range(range(1, 10)) + obj = RangeIndex.from_range(range(1, 10), name="foo") result = obj.difference(obj) - expected = RangeIndex.from_range(range(0)) + expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected) - result = obj.difference(expected) - tm.assert_index_equal(result, obj) + result = obj.difference(expected.rename("bar")) + tm.assert_index_equal(result, obj.rename(None)) result = obj.difference(obj[:3]) tm.assert_index_equal(result, obj[3:]) @@ -259,23 +259,23 @@ def test_difference(self): tm.assert_index_equal(result, obj[:-3]) result = obj.difference(obj[2:6]) - expected = Int64Index([1, 2, 7, 8, 9]) + expected = Int64Index([1, 2, 7, 8, 9], name="foo") tm.assert_index_equal(result, expected) def test_symmetric_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex - left = RangeIndex.from_range(range(1, 10)) + left = RangeIndex.from_range(range(1, 10), name="foo") result = left.symmetric_difference(left) - expected = RangeIndex.from_range(range(0)) + expected = RangeIndex.from_range(range(0), name="foo") tm.assert_index_equal(result, expected) - result = left.symmetric_difference(expected) - tm.assert_index_equal(result, left) + result = left.symmetric_difference(expected.rename("bar")) + tm.assert_index_equal(result, left.rename(None)) result = left[:-2].symmetric_difference(left[2:]) - expected = Int64Index([1, 2, 8, 9]) + expected = Int64Index([1, 2, 8, 9], name="foo") tm.assert_index_equal(result, expected) right = RangeIndex.from_range(range(10, 15)) From 2e9cd0bf854b6845c3880d96c7d8b110793806b0 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 2 Oct 2020 16:09:41 -0700 Subject: [PATCH 3/3] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6d1196b783f74..9881a71f7dcdf 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -169,6 +169,7 @@ Other enhancements - :meth:`Rolling.mean()` and :meth:`Rolling.sum()` use Kahan summation to calculate the mean to avoid numerical problems (:issue:`10319`, :issue:`11645`, :issue:`13254`, :issue:`32761`, :issue:`36031`) - :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with datetimelike dtypes will now try to cast string arguments (listlike and scalar) to the matching datetimelike type (:issue:`36346`) - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`) +- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`) .. _whatsnew_120.api_breaking.python: