pandas-dev · jbrockmendel · Jul 18, 2023 · May 26, 2023 · May 30, 2023 · May 30, 2023
diff --git a/doc/source/whatsnew/v2.1.0.rst b/doc/source/whatsnew/v2.1.0.rst
@@ -409,6 +409,7 @@ Performance improvements
 - Performance improvement in various :class:`MultiIndex` set and indexing operations (:issue:`53955`)
 - Performance improvement when doing various reshaping operations on :class:`arrays.IntegerArrays` & :class:`arrays.FloatingArray` by avoiding doing unnecessary validation (:issue:`53013`)
 - Performance improvement when indexing with pyarrow timestamp and duration dtypes (:issue:`53368`)
+- Performance improvement when passing an array to :meth:`RangeIndex.take`, :meth:`DataFrame.loc`, or :meth:`DataFrame.iloc` and the DataFrame is using a RangeIndex (:issue:`53387`)
 -
 
 .. ---------------------------------------------------------------------------

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -1266,17 +1266,17 @@ def _ensure_nanosecond_dtype(dtype: DtypeObj) -> None:
 
 # TODO: other value-dependent functions to standardize here include
 #  Index._find_common_type_compat
-def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
+def find_result_type(left_dtype: DtypeObj, right: Any) -> DtypeObj:
     """
-    Find the type/dtype for a the result of an operation between these objects.
+    Find the type/dtype for the result of an operation between objects.
 
-    This is similar to find_common_type, but looks at the objects instead
-    of just their dtypes. This can be useful in particular when one of the
-    objects does not have a `dtype`.
+    This is similar to find_common_type, but looks at the right object instead
+    of just its dtype. This can be useful in particular when the right
+    object does not have a `dtype`.
 
     Parameters
     ----------
-    left : np.ndarray or ExtensionArray
+    left_dtype : np.dtype or ExtensionDtype
     right : Any
 
     Returns
@@ -1291,26 +1291,24 @@ def find_result_type(left: ArrayLike, right: Any) -> DtypeObj:
     new_dtype: DtypeObj
 
     if (
-        isinstance(left, np.ndarray)
-        and left.dtype.kind in "iuc"
+        isinstance(left_dtype, np.dtype)
+        and left_dtype.kind in "iuc"
         and (lib.is_integer(right) or lib.is_float(right))
     ):
         # e.g. with int8 dtype and right=512, we want to end up with
         # np.int16, whereas infer_dtype_from(512) gives np.int64,
         #  which will make us upcast too far.
-        if lib.is_float(right) and right.is_integer() and left.dtype.kind != "f":
+        if lib.is_float(right) and right.is_integer() and left_dtype.kind != "f":
             right = int(right)
+        new_dtype = np.result_type(left_dtype, right)
 
-        new_dtype = np.result_type(left, right)
-
-    elif is_valid_na_for_dtype(right, left.dtype):
+    elif is_valid_na_for_dtype(right, left_dtype):
         # e.g. IntervalDtype[int] and None/np.nan
-        new_dtype = ensure_dtype_can_hold_na(left.dtype)
+        new_dtype = ensure_dtype_can_hold_na(left_dtype)
 
     else:
         dtype, _ = infer_dtype_from(right)
-
-        new_dtype = find_common_type([left.dtype, dtype])
+        new_dtype = find_common_type([left_dtype, dtype])
 
     return new_dtype
 

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -1162,7 +1162,6 @@ def take(
             taken = values.take(
                 indices, allow_fill=allow_fill, fill_value=self._na_value
             )
-        # _constructor so RangeIndex-> Index with an int64 dtype
         return self._constructor._simple_new(taken, name=self.name)
 
     @final
@@ -5560,6 +5559,10 @@ def equals(self, other: Any) -> bool:
         if not isinstance(other, Index):
             return False
 
+        if len(self) != len(other):
+            # quickly return if the lengths are different
+            return False
+
         if is_object_dtype(self.dtype) and not is_object_dtype(other.dtype):
             # if other is not object, use other's logic for coercion
             return other.equals(self)
@@ -6290,7 +6293,7 @@ def _find_common_type_compat(self, target) -> DtypeObj:
             ):
                 return _dtype_obj
 
-        dtype = find_result_type(self._values, target)
+        dtype = find_result_type(self.dtype, target)
         dtype = common_dtype_categorical_compat([self, target], dtype)
         return dtype
 

diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py
@@ -50,6 +50,7 @@
 
 if TYPE_CHECKING:
     from pandas._typing import (
+        Axis,
         Dtype,
         NaPosition,
         Self,
@@ -1105,3 +1106,44 @@ def _arith_method(self, other, op):
         except (ValueError, TypeError, ZeroDivisionError):
             # test_arithmetic_explicit_conversions
             return super()._arith_method(other, op)
+
+    def take(
+        self,
+        indices,
+        axis: Axis = 0,
+        allow_fill: bool = True,
+        fill_value=None,
+        **kwargs,
+    ):
+        if kwargs:
+            nv.validate_take((), kwargs)
+        if is_scalar(indices):
+            raise TypeError("Expected indices to be array-like")
+        indices = ensure_platform_int(indices)
+
+        # raise an exception if allow_fill is True and fill_value is not None
+        self._maybe_disallow_fill(allow_fill, fill_value, indices)
+
+        if len(indices) == 0:
+            taken = np.array([], dtype=self.dtype)
+        else:
+            ind_max = indices.max()
+            if ind_max >= len(self):
+                raise IndexError(
+                    f"index {ind_max} is out of bounds for axis 0 with size {len(self)}"
+                )
+            ind_min = indices.min()
+            if ind_min < -len(self):
+                raise IndexError(
+                    f"index {ind_min} is out of bounds for axis 0 with size {len(self)}"
+                )
+            taken = indices.astype(self.dtype, casting="safe")
+            if ind_min < 0:
+                taken %= len(self)
+            if self.step != 1:
+                taken *= self.step
+            if self.start != 0:
+                taken += self.start
+
+        # _constructor so RangeIndex-> Index with an int64 dtype
+        return self._constructor._simple_new(taken, name=self.name)
diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py
@@ -449,7 +449,7 @@ def coerce_to_target_dtype(self, other) -> Block:
         we can also safely try to coerce to the same dtype
         and will receive the same block
         """
-        new_dtype = find_result_type(self.values, other)
+        new_dtype = find_result_type(self.values.dtype, other)
 
         return self.astype(new_dtype, copy=False)
 

diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py
@@ -74,6 +74,17 @@ def test_range_float_union_dtype(self):
         result = other.union(index)
         tm.assert_index_equal(result, expected)
 
+    def test_range_uint64_union_dtype(self):
+        # https://github.com/pandas-dev/pandas/issues/26778
+        index = RangeIndex(start=0, stop=3)
+        other = Index([0, 10], dtype=np.uint64)
+        result = index.union(other)
+        expected = Index([0, 1, 2, 10], dtype=object)
+        tm.assert_index_equal(result, expected)
+
+        result = other.union(index)
+        tm.assert_index_equal(result, expected)
+
     def test_float64_index_difference(self):
         # https://github.com/pandas-dev/pandas/issues/35217
         float_index = Index([1.0, 2, 3])

diff --git a/pandas/tests/indexes/ranges/test_indexing.py b/pandas/tests/indexes/ranges/test_indexing.py
@@ -76,10 +76,52 @@ def test_take_fill_value(self):
         with pytest.raises(ValueError, match=msg):
             idx.take(np.array([1, 0, -5]), fill_value=True)
 
+    def test_take_raises_index_error(self):
+        idx = RangeIndex(1, 4, name="xxx")
+
         msg = "index -5 is out of bounds for (axis 0 with )?size 3"
         with pytest.raises(IndexError, match=msg):
             idx.take(np.array([1, -5]))
 
+        msg = "index -4 is out of bounds for (axis 0 with )?size 3"
+        with pytest.raises(IndexError, match=msg):
+            idx.take(np.array([1, -4]))
+
+        # no errors
+        result = idx.take(np.array([1, -3]))
+        expected = Index([2, 1], dtype=np.int64, name="xxx")
+        tm.assert_index_equal(result, expected)
+
+    def test_take_accepts_empty_array(self):
+        idx = RangeIndex(1, 4, name="foo")
+        result = idx.take(np.array([]))
+        expected = Index([], dtype=np.int64, name="foo")
+        tm.assert_index_equal(result, expected)
+
+        # empty index
+        idx = RangeIndex(0, name="foo")
+        result = idx.take(np.array([]))
+        expected = Index([], dtype=np.int64, name="foo")
+        tm.assert_index_equal(result, expected)
+
+    def test_take_accepts_non_int64_array(self):
+        idx = RangeIndex(1, 4, name="foo")
+        result = idx.take(np.array([2, 1], dtype=np.uint32))
+        expected = Index([3, 2], dtype=np.int64, name="foo")
+        tm.assert_index_equal(result, expected)
+
+    def test_take_when_index_has_step(self):
+        idx = RangeIndex(1, 11, 3, name="foo")  # [1, 4, 7, 10]
+        result = idx.take(np.array([1, 0, -1, -4]))
+        expected = Index([4, 1, 10, 1], dtype=np.int64, name="foo")
+        tm.assert_index_equal(result, expected)
+
+    def test_take_when_index_has_negative_step(self):
+        idx = RangeIndex(11, -4, -2, name="foo")  # [11, 9, 7, 5, 3, 1, -1, -3]
+        result = idx.take(np.array([1, 0, -1, -8]))
+        expected = Index([9, 11, -3, 11], dtype=np.int64, name="foo")
+        tm.assert_index_equal(result, expected)
+
 
 class TestWhere:
     def test_where_putmask_range_cast(self):

diff --git a/pandas/tests/indexes/ranges/test_range.py b/pandas/tests/indexes/ranges/test_range.py
@@ -210,7 +210,7 @@ def test_dtype(self, simple_index):
         assert index.dtype == np.int64
 
     def test_cache(self):
-        # GH 26565, GH26617, GH35432
+        # GH 26565, GH26617, GH35432, GH53387
         # This test checks whether _cache has been set.
         # Calling RangeIndex._cache["_data"] creates an int64 array of the same length
         # as the RangeIndex and stores it in _cache.
@@ -264,11 +264,21 @@ def test_cache(self):
         df.iloc[5:10]
         assert idx._cache == {}
 
+        # after calling take, _cache may contain other keys, but not "_data"
+        idx.take([3, 0, 1])
+        assert "_data" not in idx._cache
+
+        df.loc[[50]]
+        assert "_data" not in idx._cache
+
+        df.iloc[[5, 6, 7, 8, 9]]
+        assert "_data" not in idx._cache
+
         # idx._cache should contain a _data entry after call to idx._data
         idx._data
         assert isinstance(idx._data, np.ndarray)
         assert idx._data is idx._data  # check cached value is reused
-        assert len(idx._cache) == 1
+        assert "_data" in idx._cache
         expected = np.arange(0, 100, 10, dtype="int64")
         tm.assert_numpy_array_equal(idx._cache["_data"], expected)