Skip to content

Commit 1fc5eaa

Browse files
mroeschkepmhatre1
authored andcommitted
PERF: RangeIndex.value_counts/searchsorted/to_numpy (pandas-dev#58376)
* Add RangeIndex.value_counts,searchsorted,to_numpy * Undo engine stuff * Finish searchsorted, add wahtsnew * Remove old to_numpy implementation * Add whatsnew for to_numpy * add whatsnew number * Fix typing
1 parent 75b7033 commit 1fc5eaa

File tree

4 files changed

+102
-3
lines changed

4 files changed

+102
-3
lines changed

doc/source/whatsnew/v3.0.0.rst

+3
Original file line numberDiff line numberDiff line change
@@ -340,6 +340,9 @@ Performance improvements
340340
- Performance improvement in :meth:`RangeIndex.argmin` and :meth:`RangeIndex.argmax` (:issue:`57823`)
341341
- Performance improvement in :meth:`RangeIndex.insert` returning a :class:`RangeIndex` instead of a :class:`Index` when the :class:`RangeIndex` is empty. (:issue:`57833`)
342342
- Performance improvement in :meth:`RangeIndex.round` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57824`)
343+
- Performance improvement in :meth:`RangeIndex.searchsorted` (:issue:`58376`)
344+
- Performance improvement in :meth:`RangeIndex.to_numpy` when specifying an ``na_value`` (:issue:`58376`)
345+
- Performance improvement in :meth:`RangeIndex.value_counts` (:issue:`58376`)
343346
- Performance improvement in :meth:`RangeIndex.join` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57651`, :issue:`57752`)
344347
- Performance improvement in :meth:`RangeIndex.reindex` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57647`, :issue:`57752`)
345348
- Performance improvement in :meth:`RangeIndex.take` returning a :class:`RangeIndex` instead of a :class:`Index` when possible. (:issue:`57445`, :issue:`57752`)

pandas/core/base.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -556,7 +556,6 @@ def array(self) -> ExtensionArray:
556556
"""
557557
raise AbstractMethodError(self)
558558

559-
@final
560559
def to_numpy(
561560
self,
562561
dtype: npt.DTypeLike | None = None,
@@ -668,7 +667,7 @@ def to_numpy(
668667
)
669668

670669
values = self._values
671-
if fillna:
670+
if fillna and self.hasnans:
672671
if not can_hold_element(values, na_value):
673672
# if we can't hold the na_value asarray either makes a copy or we
674673
# error before modifying values. The asarray later on thus won't make
@@ -943,7 +942,6 @@ def _map_values(self, mapper, na_action=None):
943942

944943
return algorithms.map_array(arr, mapper, na_action=na_action)
945944

946-
@final
947945
def value_counts(
948946
self,
949947
normalize: bool = False,

pandas/core/indexes/range.py

+65
Original file line numberDiff line numberDiff line change
@@ -57,9 +57,13 @@
5757
Dtype,
5858
JoinHow,
5959
NaPosition,
60+
NumpySorter,
6061
Self,
6162
npt,
6263
)
64+
65+
from pandas import Series
66+
6367
_empty_range = range(0)
6468
_dtype_int64 = np.dtype(np.int64)
6569

@@ -1359,3 +1363,64 @@ def take( # type: ignore[override]
13591363
taken += self.start
13601364

13611365
return self._shallow_copy(taken, name=self.name)
1366+
1367+
def value_counts(
1368+
self,
1369+
normalize: bool = False,
1370+
sort: bool = True,
1371+
ascending: bool = False,
1372+
bins=None,
1373+
dropna: bool = True,
1374+
) -> Series:
1375+
from pandas import Series
1376+
1377+
if bins is not None:
1378+
return super().value_counts(
1379+
normalize=normalize,
1380+
sort=sort,
1381+
ascending=ascending,
1382+
bins=bins,
1383+
dropna=dropna,
1384+
)
1385+
name = "proportion" if normalize else "count"
1386+
data: npt.NDArray[np.floating] | npt.NDArray[np.signedinteger] = np.ones(
1387+
len(self), dtype=np.int64
1388+
)
1389+
if normalize:
1390+
data = data / len(self)
1391+
return Series(data, index=self.copy(), name=name)
1392+
1393+
def searchsorted( # type: ignore[override]
1394+
self,
1395+
value,
1396+
side: Literal["left", "right"] = "left",
1397+
sorter: NumpySorter | None = None,
1398+
) -> npt.NDArray[np.intp] | np.intp:
1399+
if side not in {"left", "right"} or sorter is not None:
1400+
return super().searchsorted(value=value, side=side, sorter=sorter)
1401+
1402+
was_scalar = False
1403+
if is_scalar(value):
1404+
was_scalar = True
1405+
array_value = np.array([value])
1406+
else:
1407+
array_value = np.asarray(value)
1408+
if array_value.dtype.kind not in "iu":
1409+
return super().searchsorted(value=value, side=side, sorter=sorter)
1410+
1411+
if flip := (self.step < 0):
1412+
rng = self._range[::-1]
1413+
start = rng.start
1414+
step = rng.step
1415+
shift = side == "right"
1416+
else:
1417+
start = self.start
1418+
step = self.step
1419+
shift = side == "left"
1420+
result = (array_value - start - int(shift)) // step + 1
1421+
if flip:
1422+
result = len(self) - result
1423+
result = np.maximum(np.minimum(result, len(self)), 0)
1424+
if was_scalar:
1425+
return np.intp(result.item())
1426+
return result.astype(np.intp, copy=False)

pandas/tests/indexes/ranges/test_range.py

+33
Original file line numberDiff line numberDiff line change
@@ -874,3 +874,36 @@ def test_getitem_integers_return_index():
874874
result = RangeIndex(0, 10, 2, name="foo")[[0, 1, -1]]
875875
expected = Index([0, 2, 8], dtype="int64", name="foo")
876876
tm.assert_index_equal(result, expected)
877+
878+
879+
@pytest.mark.parametrize("normalize", [True, False])
880+
@pytest.mark.parametrize(
881+
"rng",
882+
[
883+
range(3),
884+
range(0),
885+
range(0, 3, 2),
886+
range(3, -3, -2),
887+
],
888+
)
889+
def test_value_counts(sort, dropna, ascending, normalize, rng):
890+
ri = RangeIndex(rng, name="A")
891+
result = ri.value_counts(
892+
normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
893+
)
894+
expected = Index(list(rng), name="A").value_counts(
895+
normalize=normalize, sort=sort, ascending=ascending, dropna=dropna
896+
)
897+
tm.assert_series_equal(result, expected, check_index_type=False)
898+
899+
900+
@pytest.mark.parametrize("side", ["left", "right"])
901+
@pytest.mark.parametrize("value", [0, -5, 5, -3, np.array([-5, -3, 0, 5])])
902+
def test_searchsorted(side, value):
903+
ri = RangeIndex(-3, 3, 2)
904+
result = ri.searchsorted(value=value, side=side)
905+
expected = Index(list(ri)).searchsorted(value=value, side=side)
906+
if isinstance(value, int):
907+
assert result == expected
908+
else:
909+
tm.assert_numpy_array_equal(result, expected)

0 commit comments

Comments
 (0)