Skip to content

Commit 86ae81c

Browse files
authored
PERF: Improve performance for array equal fast (#50850)
1 parent 3fa869e commit 86ae81c

File tree

6 files changed

+21
-31
lines changed

6 files changed

+21
-31
lines changed

pandas/_libs/lib.pyi

+2-2
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,6 @@ def get_reverse_indexer(
240240
) -> npt.NDArray[np.intp]: ...
241241
def is_bool_list(obj: list) -> bool: ...
242242
def dtypes_all_equal(types: list[DtypeObj]) -> bool: ...
243-
def array_equal_fast(
244-
left: np.ndarray, right: np.ndarray # np.ndarray[np.int64, ndim=1]
243+
def is_range_indexer(
244+
left: np.ndarray, n: int # np.ndarray[np.int64, ndim=1]
245245
) -> bool: ...

pandas/_libs/lib.pyx

+4-6
Original file line numberDiff line numberDiff line change
@@ -650,22 +650,20 @@ ctypedef fused int6432_t:
650650

651651
@cython.wraparound(False)
652652
@cython.boundscheck(False)
653-
def array_equal_fast(
654-
ndarray[int6432_t, ndim=1] left, ndarray[int6432_t, ndim=1] right,
655-
) -> bool:
653+
def is_range_indexer(ndarray[int6432_t, ndim=1] left, int n) -> bool:
656654
"""
657655
Perform an element by element comparison on 1-d integer arrays, meant for indexer
658656
comparisons
659657
"""
660658
cdef:
661-
Py_ssize_t i, n = left.size
659+
Py_ssize_t i
662660

663-
if left.size != right.size:
661+
if left.size != n:
664662
return False
665663

666664
for i in range(n):
667665

668-
if left[i] != right[i]:
666+
if left[i] != i:
669667
return False
670668

671669
return True

pandas/core/frame.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
from pandas._libs.hashtable import duplicated
4949
from pandas._libs.lib import (
5050
NoDefault,
51-
array_equal_fast,
51+
is_range_indexer,
5252
no_default,
5353
)
5454
from pandas._typing import (
@@ -6724,7 +6724,7 @@ def sort_values(
67246724
else:
67256725
return self.copy(deep=None)
67266726

6727-
if array_equal_fast(indexer, np.arange(0, len(indexer), dtype=indexer.dtype)):
6727+
if is_range_indexer(indexer, len(indexer)):
67286728
if inplace:
67296729
return self._update_inplace(self)
67306730
else:

pandas/core/generic.py

+2-5
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@
3636
)
3737

3838
from pandas._libs import lib
39-
from pandas._libs.lib import array_equal_fast
39+
from pandas._libs.lib import is_range_indexer
4040
from pandas._libs.tslibs import (
4141
Period,
4242
Tick,
@@ -3921,10 +3921,7 @@ def _take(
39213921
axis == 0
39223922
and indices.ndim == 1
39233923
and using_copy_on_write()
3924-
and array_equal_fast(
3925-
indices,
3926-
np.arange(0, len(self), dtype=np.intp),
3927-
)
3924+
and is_range_indexer(indices, len(self))
39283925
):
39293926
return self.copy(deep=None)
39303927

pandas/core/series.py

+3-5
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@
3333
reshape,
3434
)
3535
from pandas._libs.lib import (
36-
array_equal_fast,
36+
is_range_indexer,
3737
no_default,
3838
)
3939
from pandas._typing import (
@@ -890,7 +890,7 @@ def take(self, indices, axis: Axis = 0, **kwargs) -> Series:
890890
if (
891891
indices.ndim == 1
892892
and using_copy_on_write()
893-
and array_equal_fast(indices, np.arange(0, len(self), dtype=indices.dtype))
893+
and is_range_indexer(indices, len(self))
894894
):
895895
return self.copy(deep=None)
896896

@@ -3585,9 +3585,7 @@ def sort_values(
35853585
values_to_sort = ensure_key_mapped(self, key)._values if key else self._values
35863586
sorted_index = nargsort(values_to_sort, kind, bool(ascending), na_position)
35873587

3588-
if array_equal_fast(
3589-
sorted_index, np.arange(0, len(sorted_index), dtype=sorted_index.dtype)
3590-
):
3588+
if is_range_indexer(sorted_index, len(sorted_index)):
35913589
if inplace:
35923590
return self._update_inplace(self)
35933591
return self.copy(deep=None)

pandas/tests/libs/test_lib.py

+8-11
Original file line numberDiff line numberDiff line change
@@ -244,25 +244,22 @@ def test_get_reverse_indexer(self):
244244
tm.assert_numpy_array_equal(result, expected)
245245

246246
@pytest.mark.parametrize("dtype", ["int64", "int32"])
247-
def test_array_equal_fast(self, dtype):
247+
def test_is_range_indexer(self, dtype):
248248
# GH#50592
249-
left = np.arange(1, 100, dtype=dtype)
250-
right = np.arange(1, 100, dtype=dtype)
251-
assert lib.array_equal_fast(left, right)
249+
left = np.arange(0, 100, dtype=dtype)
250+
assert lib.is_range_indexer(left, 100)
252251

253252
@pytest.mark.parametrize("dtype", ["int64", "int32"])
254-
def test_array_equal_fast_not_equal(self, dtype):
253+
def test_is_range_indexer_not_equal(self, dtype):
255254
# GH#50592
256255
left = np.array([1, 2], dtype=dtype)
257-
right = np.array([2, 2], dtype=dtype)
258-
assert not lib.array_equal_fast(left, right)
256+
assert not lib.is_range_indexer(left, 2)
259257

260258
@pytest.mark.parametrize("dtype", ["int64", "int32"])
261-
def test_array_equal_fast_not_equal_shape(self, dtype):
259+
def test_is_range_indexer_not_equal_shape(self, dtype):
262260
# GH#50592
263-
left = np.array([1, 2, 3], dtype=dtype)
264-
right = np.array([2, 2], dtype=dtype)
265-
assert not lib.array_equal_fast(left, right)
261+
left = np.array([0, 1, 2], dtype=dtype)
262+
assert not lib.is_range_indexer(left, 2)
266263

267264

268265
def test_cache_readonly_preserve_docstrings():

0 commit comments

Comments
 (0)