Skip to content

Commit 30b3c75

Browse files
mroeschkepmhatre1
authored andcommitted
REF: Avoid new object creation when reverse slicing when possible (pandas-dev#57902)
* REF: Avoid new objects when reverse slicing when possible * Adjust test * Remove astypes * Fix typing
1 parent 5f6d4c3 commit 30b3c75

File tree

11 files changed

+59
-53
lines changed

11 files changed

+59
-53
lines changed

pandas/core/arrays/datetimelike.py

+5-4
Original file line numberDiff line numberDiff line change
@@ -2371,11 +2371,12 @@ def factorize(
23712371
):
23722372
if self.freq is not None:
23732373
# We must be unique, so can short-circuit (and retain freq)
2374-
codes = np.arange(len(self), dtype=np.intp)
2375-
uniques = self.copy() # TODO: copy or view?
23762374
if sort and self.freq.n < 0:
2377-
codes = codes[::-1]
2378-
uniques = uniques[::-1]
2375+
codes = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
2376+
uniques = self[::-1]
2377+
else:
2378+
codes = np.arange(len(self), dtype=np.intp)
2379+
uniques = self.copy() # TODO: copy or view?
23792380
return codes, uniques
23802381

23812382
if sort:

pandas/core/indexes/base.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -2116,7 +2116,7 @@ def droplevel(self, level: IndexLabel = 0):
21162116
if not isinstance(level, (tuple, list)):
21172117
level = [level]
21182118

2119-
levnums = sorted(self._get_level_number(lev) for lev in level)[::-1]
2119+
levnums = sorted((self._get_level_number(lev) for lev in level), reverse=True)
21202120

21212121
return self._drop_level_numbers(levnums)
21222122

pandas/core/indexes/multi.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3589,7 +3589,7 @@ def _reorder_indexer(
35893589
new_order = key_order_map[self.codes[i][indexer]]
35903590
elif isinstance(k, slice) and k.step is not None and k.step < 0:
35913591
# flip order for negative step
3592-
new_order = np.arange(n)[::-1][indexer]
3592+
new_order = np.arange(n - 1, -1, -1)[indexer]
35933593
elif isinstance(k, slice) and k.start is None and k.stop is None:
35943594
# slice(None) should not determine order GH#31330
35953595
new_order = np.ones((n,), dtype=np.intp)[indexer]

pandas/core/indexes/range.py

+27-27
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,12 @@
6464
_dtype_int64 = np.dtype(np.int64)
6565

6666

67+
def min_fitting_element(start: int, step: int, lower_limit: int) -> int:
68+
"""Returns the smallest element greater than or equal to the limit"""
69+
no_steps = -(-(lower_limit - start) // abs(step))
70+
return start + abs(step) * no_steps
71+
72+
6773
class RangeIndex(Index):
6874
"""
6975
Immutable Index implementing a monotonic integer range.
@@ -570,25 +576,30 @@ def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]:
570576
kwargs.pop("kind", None) # e.g. "mergesort" is irrelevant
571577
nv.validate_argsort(args, kwargs)
572578

579+
start, stop, step = None, None, None
573580
if self._range.step > 0:
574-
result = np.arange(len(self), dtype=np.intp)
581+
if ascending:
582+
start = len(self)
583+
else:
584+
start, stop, step = len(self) - 1, -1, -1
585+
elif ascending:
586+
start, stop, step = len(self) - 1, -1, -1
575587
else:
576-
result = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
588+
start = len(self)
577589

578-
if not ascending:
579-
result = result[::-1]
580-
return result
590+
return np.arange(start, stop, step, dtype=np.intp)
581591

582592
def factorize(
583593
self,
584594
sort: bool = False,
585595
use_na_sentinel: bool = True,
586596
) -> tuple[npt.NDArray[np.intp], RangeIndex]:
587-
codes = np.arange(len(self), dtype=np.intp)
588-
uniques = self
589597
if sort and self.step < 0:
590-
codes = codes[::-1]
591-
uniques = uniques[::-1]
598+
codes = np.arange(len(self) - 1, -1, -1, dtype=np.intp)
599+
uniques = self[::-1]
600+
else:
601+
codes = np.arange(len(self), dtype=np.intp)
602+
uniques = self
592603
return codes, uniques
593604

594605
def equals(self, other: object) -> bool:
@@ -699,26 +710,15 @@ def _intersection(self, other: Index, sort: bool = False):
699710
# intersection disregarding the lower bounds
700711
tmp_start = first.start + (second.start - first.start) * first.step // gcd * s
701712
new_step = first.step * second.step // gcd
702-
new_range = range(tmp_start, int_high, new_step)
703-
new_index = self._simple_new(new_range)
704713

705714
# adjust index to limiting interval
706-
new_start = new_index._min_fitting_element(int_low)
707-
new_range = range(new_start, new_index.stop, new_index.step)
708-
new_index = self._simple_new(new_range)
715+
new_start = min_fitting_element(tmp_start, new_step, int_low)
716+
new_range = range(new_start, int_high, new_step)
709717

710-
if (self.step < 0 and other.step < 0) is not (new_index.step < 0):
711-
new_index = new_index[::-1]
718+
if (self.step < 0 and other.step < 0) is not (new_range.step < 0):
719+
new_range = new_range[::-1]
712720

713-
if sort is None:
714-
new_index = new_index.sort_values()
715-
716-
return new_index
717-
718-
def _min_fitting_element(self, lower_limit: int) -> int:
719-
"""Returns the smallest element greater than or equal to the limit"""
720-
no_steps = -(-(lower_limit - self.start) // abs(self.step))
721-
return self.start + abs(self.step) * no_steps
721+
return self._simple_new(new_range)
722722

723723
def _extended_gcd(self, a: int, b: int) -> tuple[int, int, int]:
724724
"""
@@ -904,9 +904,9 @@ def _difference(self, other, sort=None):
904904
# e.g. range(10) and range(0, 10, 3)
905905
return super()._difference(other, sort=sort)
906906

907-
new_index = type(self)._simple_new(new_rng, name=res_name)
908907
if first is not self._range:
909-
new_index = new_index[::-1]
908+
new_rng = new_rng[::-1]
909+
new_index = type(self)._simple_new(new_rng, name=res_name)
910910

911911
return new_index
912912

pandas/core/indexing.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1145,7 +1145,7 @@ def _contains_slice(x: object) -> bool:
11451145
# GH#41369 Loop in reverse order ensures indexing along columns before rows
11461146
# which selects only necessary blocks which avoids dtype conversion if possible
11471147
axis = len(tup) - 1
1148-
for key in tup[::-1]:
1148+
for key in reversed(tup):
11491149
if com.is_null_slice(key):
11501150
axis -= 1
11511151
continue

pandas/core/internals/managers.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -1549,9 +1549,9 @@ def _insert_update_blklocs_and_blknos(self, loc) -> None:
15491549
self._blklocs = np.append(self._blklocs, 0)
15501550
self._blknos = np.append(self._blknos, len(self.blocks))
15511551
elif loc == 0:
1552-
# np.append is a lot faster, let's use it if we can.
1553-
self._blklocs = np.append(self._blklocs[::-1], 0)[::-1]
1554-
self._blknos = np.append(self._blknos[::-1], len(self.blocks))[::-1]
1552+
# As of numpy 1.26.4, np.concatenate faster than np.append
1553+
self._blklocs = np.concatenate([[0], self._blklocs])
1554+
self._blknos = np.concatenate([[len(self.blocks)], self._blknos])
15551555
else:
15561556
new_blklocs, new_blknos = libinternals.update_blklocs_and_blknos(
15571557
self.blklocs, self.blknos, loc, len(self.blocks)

pandas/core/reshape/reshape.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -910,9 +910,10 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
910910
raise ValueError("Columns with duplicate values are not supported in stack")
911911

912912
# If we need to drop `level` from columns, it needs to be in descending order
913+
set_levels = set(level)
913914
drop_levnums = sorted(level, reverse=True)
914915
stack_cols = frame.columns._drop_level_numbers(
915-
[k for k in range(frame.columns.nlevels) if k not in level][::-1]
916+
[k for k in range(frame.columns.nlevels - 1, -1, -1) if k not in set_levels]
916917
)
917918
if len(level) > 1:
918919
# Arrange columns in the order we want to take them, e.g. level=[2, 0, 1]
@@ -936,7 +937,7 @@ def stack_v3(frame: DataFrame, level: list[int]) -> Series | DataFrame:
936937
idx = (idx,)
937938
gen = iter(idx)
938939
column_indexer = tuple(
939-
next(gen) if k in level else slice(None)
940+
next(gen) if k in set_levels else slice(None)
940941
for k in range(frame.columns.nlevels)
941942
)
942943
data = frame.loc[:, column_indexer]

pandas/core/series.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -5510,9 +5510,9 @@ def case_when(
55105510
replacements = updated_replacements
55115511
default = default.astype(common_dtype)
55125512

5513-
counter = reversed(range(len(conditions)))
5513+
counter = range(len(conditions) - 1, -1, -1)
55145514
for position, condition, replacement in zip(
5515-
counter, conditions[::-1], replacements[::-1]
5515+
counter, reversed(conditions), reversed(replacements)
55165516
):
55175517
try:
55185518
default = default.mask(

pandas/core/sorting.py

+7-4
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from __future__ import annotations
44

5+
import itertools
56
from typing import (
67
TYPE_CHECKING,
78
Callable,
@@ -334,13 +335,15 @@ def lexsort_indexer(
334335
raise ValueError(f"invalid na_position: {na_position}")
335336

336337
if isinstance(orders, bool):
337-
orders = [orders] * len(keys)
338+
orders = itertools.repeat(orders, len(keys))
338339
elif orders is None:
339-
orders = [True] * len(keys)
340+
orders = itertools.repeat(True, len(keys))
341+
else:
342+
orders = reversed(orders)
340343

341344
labels = []
342345

343-
for k, order in zip(keys, orders):
346+
for k, order in zip(reversed(keys), orders):
344347
k = ensure_key_mapped(k, key)
345348
if codes_given:
346349
codes = cast(np.ndarray, k)
@@ -361,7 +364,7 @@ def lexsort_indexer(
361364

362365
labels.append(codes)
363366

364-
return np.lexsort(labels[::-1])
367+
return np.lexsort(labels)
365368

366369

367370
def nargsort(

pandas/tests/indexes/ranges/test_range.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
RangeIndex,
1010
)
1111
import pandas._testing as tm
12+
from pandas.core.indexes.range import min_fitting_element
1213

1314

1415
class TestRangeIndex:
@@ -419,21 +420,21 @@ def test_extended_gcd(self, simple_index):
419420
assert 2 == result[0]
420421

421422
def test_min_fitting_element(self):
422-
result = RangeIndex(0, 20, 2)._min_fitting_element(1)
423+
result = min_fitting_element(0, 2, 1)
423424
assert 2 == result
424425

425-
result = RangeIndex(1, 6)._min_fitting_element(1)
426+
result = min_fitting_element(1, 1, 1)
426427
assert 1 == result
427428

428-
result = RangeIndex(18, -2, -2)._min_fitting_element(1)
429+
result = min_fitting_element(18, -2, 1)
429430
assert 2 == result
430431

431-
result = RangeIndex(5, 0, -1)._min_fitting_element(1)
432+
result = min_fitting_element(5, -1, 1)
432433
assert 1 == result
433434

434435
big_num = 500000000000000000000000
435436

436-
result = RangeIndex(5, big_num * 2, 1)._min_fitting_element(big_num)
437+
result = min_fitting_element(5, 1, big_num)
437438
assert big_num == result
438439

439440
def test_slice_specialised(self, simple_index):

pandas/tests/indexes/ranges/test_setops.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -93,12 +93,12 @@ def test_intersection(self, sort):
9393
# GH 17296: intersect two decreasing RangeIndexes
9494
first = RangeIndex(10, -2, -2)
9595
other = RangeIndex(5, -4, -1)
96-
expected = first.astype(int).intersection(other.astype(int), sort=sort)
97-
result = first.intersection(other, sort=sort).astype(int)
96+
expected = RangeIndex(start=4, stop=-2, step=-2)
97+
result = first.intersection(other, sort=sort)
9898
tm.assert_index_equal(result, expected)
9999

100100
# reversed
101-
result = other.intersection(first, sort=sort).astype(int)
101+
result = other.intersection(first, sort=sort)
102102
tm.assert_index_equal(result, expected)
103103

104104
index = RangeIndex(5, name="foo")

0 commit comments

Comments
 (0)