Skip to content

Commit 1e03419

Browse files
authored
PERF: slicing (#52183)
* PERF: slicing * revert __finalize__ optimizations
1 parent 5a65a73 commit 1e03419

File tree

7 files changed

+47
-32
lines changed

7 files changed

+47
-32
lines changed

pandas/_libs/internals.pyx

+1-1
Original file line numberDiff line numberDiff line change
@@ -831,7 +831,7 @@ cdef class BlockManager:
831831
# -------------------------------------------------------------------
832832
# Indexing
833833

834-
cdef BlockManager _get_index_slice(self, slobj):
834+
cdef BlockManager _get_index_slice(self, slice slobj):
835835
cdef:
836836
SharedBlock blk, nb
837837
BlockManager mgr

pandas/_libs/lib.pyi

+1
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def is_decimal(val: object) -> TypeGuard[Decimal]: ...
4747
def is_complex(val: object) -> TypeGuard[complex]: ...
4848
def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
4949
def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
50+
def is_int_or_none(obj) -> bool: ...
5051
def is_float(val: object) -> TypeGuard[float]: ...
5152
def is_interval_array(values: np.ndarray) -> bool: ...
5253
def is_datetime64_array(values: np.ndarray) -> bool: ...

pandas/_libs/lib.pyx

+11
Original file line numberDiff line numberDiff line change
@@ -1057,6 +1057,17 @@ def is_integer(obj: object) -> bool:
10571057
return util.is_integer_object(obj)
10581058

10591059

1060+
def is_int_or_none(obj) -> bool:
1061+
"""
1062+
Return True if given object is integer or None.
1063+
1064+
Returns
1065+
-------
1066+
bool
1067+
"""
1068+
return obj is None or util.is_integer_object(obj)
1069+
1070+
10601071
def is_bool(obj: object) -> bool:
10611072
"""
10621073
Return True if given object is boolean.

pandas/core/indexers/utils.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@
1010

1111
import numpy as np
1212

13+
from pandas._libs import lib
14+
1315
from pandas.core.dtypes.common import (
1416
is_array_like,
1517
is_bool_dtype,
@@ -50,14 +52,10 @@ def is_valid_positional_slice(slc: slice) -> bool:
5052
A valid positional slice may also be interpreted as a label-based slice
5153
depending on the index being sliced.
5254
"""
53-
54-
def is_int_or_none(val):
55-
return val is None or is_integer(val)
56-
5755
return (
58-
is_int_or_none(slc.start)
59-
and is_int_or_none(slc.stop)
60-
and is_int_or_none(slc.step)
56+
lib.is_int_or_none(slc.start)
57+
and lib.is_int_or_none(slc.stop)
58+
and lib.is_int_or_none(slc.step)
6159
)
6260

6361

pandas/core/indexes/base.py

+26-22
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,6 @@
9999
is_float_dtype,
100100
is_hashable,
101101
is_integer,
102-
is_integer_dtype,
103102
is_iterator,
104103
is_list_like,
105104
is_numeric_dtype,
@@ -161,7 +160,10 @@
161160
extract_array,
162161
sanitize_array,
163162
)
164-
from pandas.core.indexers import disallow_ndim_indexing
163+
from pandas.core.indexers import (
164+
disallow_ndim_indexing,
165+
is_valid_positional_slice,
166+
)
165167
from pandas.core.indexes.frozen import FrozenList
166168
from pandas.core.missing import clean_reindex_fill_method
167169
from pandas.core.ops import get_op_result_name
@@ -4071,7 +4073,7 @@ def _validate_positional_slice(self, key: slice) -> None:
40714073
self._validate_indexer("positional", key.stop, "iloc")
40724074
self._validate_indexer("positional", key.step, "iloc")
40734075

4074-
def _convert_slice_indexer(self, key: slice, kind: str_t):
4076+
def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
40754077
"""
40764078
Convert a slice indexer.
40774079
@@ -4083,7 +4085,6 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
40834085
key : label of the slice bound
40844086
kind : {'loc', 'getitem'}
40854087
"""
4086-
assert kind in ["loc", "getitem"], kind
40874088

40884089
# potentially cast the bounds to integers
40894090
start, stop, step = key.start, key.stop, key.step
@@ -4096,22 +4097,14 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
40964097
return self.slice_indexer(start, stop, step)
40974098

40984099
# figure out if this is a positional indexer
4099-
def is_int(v):
4100-
return v is None or is_integer(v)
4101-
4102-
is_index_slice = is_int(start) and is_int(stop) and is_int(step)
4103-
4104-
# special case for interval_dtype bc we do not do partial-indexing
4105-
# on integer Intervals when slicing
4106-
# TODO: write this in terms of e.g. should_partial_index?
4107-
ints_are_positional = self._should_fallback_to_positional or isinstance(
4108-
self.dtype, IntervalDtype
4109-
)
4110-
is_positional = is_index_slice and ints_are_positional
4100+
is_index_slice = is_valid_positional_slice(key)
41114101

41124102
if kind == "getitem":
41134103
# called from the getitem slicers, validate that we are in fact integers
4114-
if is_index_slice or is_integer_dtype(self.dtype):
4104+
if is_index_slice:
4105+
# In this case the _validate_indexer checks below are redundant
4106+
return key
4107+
elif self.dtype.kind in "iu":
41154108
# Note: these checks are redundant if we know is_index_slice
41164109
self._validate_indexer("slice", key.start, "getitem")
41174110
self._validate_indexer("slice", key.stop, "getitem")
@@ -4120,6 +4113,14 @@ def is_int(v):
41204113

41214114
# convert the slice to an indexer here
41224115

4116+
# special case for interval_dtype bc we do not do partial-indexing
4117+
# on integer Intervals when slicing
4118+
# TODO: write this in terms of e.g. should_partial_index?
4119+
ints_are_positional = self._should_fallback_to_positional or isinstance(
4120+
self.dtype, IntervalDtype
4121+
)
4122+
is_positional = is_index_slice and ints_are_positional
4123+
41234124
# if we are mixed and have integers
41244125
if is_positional:
41254126
try:
@@ -4151,7 +4152,7 @@ def is_int(v):
41514152
@final
41524153
def _raise_invalid_indexer(
41534154
self,
4154-
form: str_t,
4155+
form: Literal["slice", "positional"],
41554156
key,
41564157
reraise: lib.NoDefault | None | Exception = lib.no_default,
41574158
) -> None:
@@ -6384,14 +6385,17 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
63846385
return ensure_index(target)
63856386

63866387
@final
6387-
def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
6388+
def _validate_indexer(
6389+
self,
6390+
form: Literal["positional", "slice"],
6391+
key,
6392+
kind: Literal["getitem", "iloc"],
6393+
) -> None:
63886394
"""
63896395
If we are positional indexer, validate that we have appropriate
63906396
typed bounds must be an integer.
63916397
"""
6392-
assert kind in ["getitem", "iloc"]
6393-
6394-
if key is not None and not is_integer(key):
6398+
if not lib.is_int_or_none(key):
63956399
self._raise_invalid_indexer(form, key)
63966400

63976401
def _maybe_cast_slice_bound(self, label, side: str_t):

pandas/core/indexes/interval.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -785,7 +785,7 @@ def _index_as_unique(self) -> bool:
785785
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
786786
)
787787

788-
def _convert_slice_indexer(self, key: slice, kind: str):
788+
def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
789789
if not (key.step is None or key.step == 1):
790790
# GH#31658 if label-based, we require step == 1,
791791
# if positional, we disallow float start/stop

pandas/core/indexes/range.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@
5454
npt,
5555
)
5656
_empty_range = range(0)
57+
_dtype_int64 = np.dtype(np.int64)
5758

5859

5960
class RangeIndex(Index):
@@ -309,7 +310,7 @@ def memory_usage(self, deep: bool = False) -> int:
309310

310311
@property
311312
def dtype(self) -> np.dtype:
312-
return np.dtype(np.int64)
313+
return _dtype_int64
313314

314315
@property
315316
def is_unique(self) -> bool:

0 commit comments

Comments
 (0)