diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 08edc7531bcd6..4f1f54af2dd6a 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -772,6 +772,7 @@ Other - Bug in :meth:`Index.union` behaving differently depending on whether operand is an :class:`Index` or other list-like (:issue:`36384`) - Passing an array with 2 or more dimensions to the :class:`Series` constructor now raises the more specific ``ValueError`` rather than a bare ``Exception`` (:issue:`35744`) - Bug in ``dir`` where ``dir(obj)`` wouldn't show attributes defined on the instance for pandas objects (:issue:`37173`) +- Bug in :meth:`RangeIndex.difference` returning :class:`Int64Index` in some cases where it should return :class:`RangeIndex` (:issue:`38028`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 8e12f84895361..8e5725fd72544 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -660,13 +660,17 @@ def difference(self, other, sort=None): if not isinstance(overlap, RangeIndex): # We wont end up with RangeIndex, so fall back return super().difference(other, sort=sort) + if overlap.step != first.step: + # In some cases we might be able to get a RangeIndex back, + # but not worth the effort. + return super().difference(other, sort=sort) if overlap[0] == first.start: # The difference is everything after the intersection new_rng = range(overlap[-1] + first.step, first.stop, first.step) - elif overlap[-1] == first.stop: + elif overlap[-1] == first[-1]: # The difference is everything before the intersection - new_rng = range(first.start, overlap[0] - first.step, first.step) + new_rng = range(first.start, overlap[0], first.step) else: # The difference is not range-like return super().difference(other, sort=sort) diff --git a/pandas/tests/indexes/ranges/test_setops.py b/pandas/tests/indexes/ranges/test_setops.py index 9c9f5dbdf7e7f..1fd41b017221b 100644 --- a/pandas/tests/indexes/ranges/test_setops.py +++ b/pandas/tests/indexes/ranges/test_setops.py @@ -247,21 +247,38 @@ def test_difference(self): result = obj.difference(obj) expected = RangeIndex.from_range(range(0), name="foo") - tm.assert_index_equal(result, expected) + tm.assert_index_equal(result, expected, exact=True) result = obj.difference(expected.rename("bar")) - tm.assert_index_equal(result, obj.rename(None)) + tm.assert_index_equal(result, obj.rename(None), exact=True) result = obj.difference(obj[:3]) - tm.assert_index_equal(result, obj[3:]) + tm.assert_index_equal(result, obj[3:], exact=True) result = obj.difference(obj[-3:]) - tm.assert_index_equal(result, obj[:-3]) + tm.assert_index_equal(result, obj[:-3], exact=True) + + result = obj[::-1].difference(obj[-3:]) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) + + result = obj[::-1].difference(obj[-3:][::-1]) + tm.assert_index_equal(result, obj[:-3][::-1], exact=True) result = obj.difference(obj[2:6]) expected = Int64Index([1, 2, 7, 8, 9], name="foo") tm.assert_index_equal(result, expected) + def test_difference_mismatched_step(self): + obj = RangeIndex.from_range(range(1, 10), name="foo") + + result = obj.difference(obj[::2]) + expected = obj[1::2]._int64index + tm.assert_index_equal(result, expected, exact=True) + + result = obj.difference(obj[1::2]) + expected = obj[::2]._int64index + tm.assert_index_equal(result, expected, exact=True) + def test_symmetric_difference(self): # GH#12034 Cases where we operate against another RangeIndex and may # get back another RangeIndex