From 29940d99f51340d3dd28684a033539302db03cf6 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 18 Oct 2021 09:35:10 -0700 Subject: [PATCH] ENH: preserve RangeIndex in insert, delete --- pandas/core/indexes/range.py | 45 +++++++++++- pandas/tests/indexes/ranges/test_range.py | 86 +++++++++++++++++++++-- 2 files changed, 124 insertions(+), 7 deletions(-) diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 9eb086ed97180..487a1880caff5 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -15,7 +15,10 @@ import numpy as np -from pandas._libs import index as libindex +from pandas._libs import ( + index as libindex, + lib, +) from pandas._libs.lib import no_default from pandas._typing import ( Dtype, @@ -719,9 +722,41 @@ def symmetric_difference(self, other, result_name: Hashable = None, sort=None): # -------------------------------------------------------------------- + # error: Return type "Index" of "delete" incompatible with return type + # "RangeIndex" in supertype "Index" + def delete(self, loc) -> Index: # type: ignore[override] + # In some cases we can retain RangeIndex, see also + # DatetimeTimedeltaMixin._get_delete_Freq + if is_integer(loc): + if loc == 0 or loc == -len(self): + return self[1:] + if loc == -1 or loc == len(self) - 1: + return self[:-1] + + elif lib.is_list_like(loc): + slc = lib.maybe_indices_to_slice(np.asarray(loc, dtype=np.intp), len(self)) + if isinstance(slc, slice) and slc.step is not None and slc.step < 0: + rng = range(len(self))[slc][::-1] + slc = slice(rng.start, rng.stop, rng.step) + + if isinstance(slc, slice) and slc.step in [1, None]: + # Note: maybe_indices_to_slice will never return a slice + # with 'slc.start is None'; may have slc.stop None in cases + # with negative step + if slc.start == 0: + return self[slc.stop :] + elif slc.stop in [len(self), None]: + return self[: slc.start] + + # TODO: more generally, self.difference(self[slc]), + # once _difference is better about retaining RangeIndex + + return super().delete(loc) + def insert(self, loc: int, item) -> Index: if len(self) and (is_integer(item) or is_float(item)): - # We can retain RangeIndex is inserting at the beginning or end + # We can retain RangeIndex is inserting at the beginning or end, + # or right in the middle. rng = self._range if loc == 0 and item == self[0] - self.step: new_rng = range(rng.start - rng.step, rng.stop, rng.step) @@ -731,6 +766,12 @@ def insert(self, loc: int, item) -> Index: new_rng = range(rng.start, rng.stop + rng.step, rng.step) return type(self)._simple_new(new_rng, name=self.name) + elif len(self) == 2 and item == self[0] + self.step / 2: + # e.g. inserting 1 into [0, 2] + step = int(self.step / 2) + new_rng = range(self.start, self.stop, step) + return type(self)._simple_new(new_rng, name=self.name) + return super().insert(loc, item) def _concat(self, indexes: list[Index], name: Hashable) -> Index: diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py index 9732c0faf9efd..7591620de168a 100644 --- a/pandas/tests/indexes/ranges/test_range.py +++ b/pandas/tests/indexes/ranges/test_range.py @@ -127,18 +127,40 @@ def test_insert(self): expected = Index([0, pd.NaT, 1, 2, 3, 4], dtype=object) tm.assert_index_equal(result, expected) + def test_insert_edges_preserves_rangeindex(self): + idx = Index(range(4, 9, 2)) + + result = idx.insert(0, 2) + expected = Index(range(2, 9, 2)) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.insert(3, 10) + expected = Index(range(4, 11, 2)) + tm.assert_index_equal(result, expected, exact=True) + + def test_insert_middle_preserves_rangeindex(self): + # insert in the middle + idx = Index(range(0, 3, 2)) + result = idx.insert(1, 1) + expected = Index(range(3)) + tm.assert_index_equal(result, expected, exact=True) + + idx = idx * 2 + result = idx.insert(1, 2) + expected = expected * 2 + tm.assert_index_equal(result, expected, exact=True) + def test_delete(self): idx = RangeIndex(5, name="Foo") - expected = idx[1:].astype(int) + expected = idx[1:] result = idx.delete(0) - # TODO: could preserve RangeIndex at the ends - tm.assert_index_equal(result, expected, exact="equiv") + tm.assert_index_equal(result, expected, exact=True) assert result.name == expected.name - expected = idx[:-1].astype(int) + expected = idx[:-1] result = idx.delete(-1) - tm.assert_index_equal(result, expected, exact="equiv") + tm.assert_index_equal(result, expected, exact=True) assert result.name == expected.name msg = "index 5 is out of bounds for axis 0 with size 5" @@ -146,6 +168,60 @@ def test_delete(self): # either depending on numpy version result = idx.delete(len(idx)) + def test_delete_preserves_rangeindex(self): + idx = Index(range(2), name="foo") + + result = idx.delete([1]) + expected = Index(range(1), name="foo") + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(1) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_at_end(self): + idx = RangeIndex(0, 6, 1) + + loc = [2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:2] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_preserves_rangeindex_list_middle(self): + idx = RangeIndex(0, 6, 1) + + loc = [1, 2, 3, 4] + result = idx.delete(loc) + expected = RangeIndex(0, 6, 5) + tm.assert_index_equal(result, expected, exact="equiv") # TODO: retain! + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact="equiv") # TODO: retain! + + def test_delete_all_preserves_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 1, 2, 3, 4, 5] + result = idx.delete(loc) + expected = idx[:0] + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + + def test_delete_not_preserving_rangeindex(self): + idx = RangeIndex(0, 6, 1) + + loc = [0, 3, 5] + result = idx.delete(loc) + expected = Int64Index([1, 2, 4]) + tm.assert_index_equal(result, expected, exact=True) + + result = idx.delete(loc[::-1]) + tm.assert_index_equal(result, expected, exact=True) + def test_view(self): i = RangeIndex(0, name="Foo") i_view = i.view()