Skip to content

PERF: slicing #52183

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Mar 29, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pandas/_libs/internals.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -831,7 +831,7 @@ cdef class BlockManager:
# -------------------------------------------------------------------
# Indexing

cdef BlockManager _get_index_slice(self, slobj):
cdef BlockManager _get_index_slice(self, slice slobj):
cdef:
SharedBlock blk, nb
BlockManager mgr
Expand Down
1 change: 1 addition & 0 deletions pandas/_libs/lib.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ def is_decimal(val: object) -> TypeGuard[Decimal]: ...
def is_complex(val: object) -> TypeGuard[complex]: ...
def is_bool(val: object) -> TypeGuard[bool | np.bool_]: ...
def is_integer(val: object) -> TypeGuard[int | np.integer]: ...
def is_int_or_none(obj) -> bool: ...
def is_float(val: object) -> TypeGuard[float]: ...
def is_interval_array(values: np.ndarray) -> bool: ...
def is_datetime64_array(values: np.ndarray) -> bool: ...
Expand Down
11 changes: 11 additions & 0 deletions pandas/_libs/lib.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1057,6 +1057,17 @@ def is_integer(obj: object) -> bool:
return util.is_integer_object(obj)


def is_int_or_none(obj) -> bool:
"""
Return True if given object is integer or None.

Returns
-------
bool
"""
return obj is None or util.is_integer_object(obj)


def is_bool(obj: object) -> bool:
"""
Return True if given object is boolean.
Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -6009,10 +6009,10 @@ def __finalize__(self, other, method: str | None = None, **kwargs) -> Self:
stable across pandas releases.
"""
if isinstance(other, NDFrame):
for name in other.attrs:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are these changes supposed to be in this PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes. __finalize__ adds up.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like .attrs and .flags just call ._attrs and ._flags respectively? Did the attribute call significantly impact the slicing benchmark?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not seeing much difference, will revert.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reverted+greenish

for name in other._attrs:
self.attrs[name] = other.attrs[name]

self.flags.allows_duplicate_labels = other.flags.allows_duplicate_labels
self._flags.allows_duplicate_labels = other._flags.allows_duplicate_labels
# For subclasses using _metadata.
for name in set(self._metadata) & set(other._metadata):
assert isinstance(name, str)
Expand Down
12 changes: 5 additions & 7 deletions pandas/core/indexers/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

import numpy as np

from pandas._libs import lib

from pandas.core.dtypes.common import (
is_array_like,
is_bool_dtype,
Expand Down Expand Up @@ -50,14 +52,10 @@ def is_valid_positional_slice(slc: slice) -> bool:
A valid positional slice may also be interpreted as a label-based slice
depending on the index being sliced.
"""

def is_int_or_none(val):
return val is None or is_integer(val)

return (
is_int_or_none(slc.start)
and is_int_or_none(slc.stop)
and is_int_or_none(slc.step)
lib.is_int_or_none(slc.start)
and lib.is_int_or_none(slc.stop)
and lib.is_int_or_none(slc.step)
)


Expand Down
48 changes: 26 additions & 22 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@
is_float_dtype,
is_hashable,
is_integer,
is_integer_dtype,
is_iterator,
is_list_like,
is_numeric_dtype,
Expand Down Expand Up @@ -162,7 +161,10 @@
extract_array,
sanitize_array,
)
from pandas.core.indexers import disallow_ndim_indexing
from pandas.core.indexers import (
disallow_ndim_indexing,
is_valid_positional_slice,
)
from pandas.core.indexes.frozen import FrozenList
from pandas.core.missing import clean_reindex_fill_method
from pandas.core.ops import get_op_result_name
Expand Down Expand Up @@ -4072,7 +4074,7 @@ def _validate_positional_slice(self, key: slice) -> None:
self._validate_indexer("positional", key.stop, "iloc")
self._validate_indexer("positional", key.step, "iloc")

def _convert_slice_indexer(self, key: slice, kind: str_t):
def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
"""
Convert a slice indexer.

Expand All @@ -4084,7 +4086,6 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
key : label of the slice bound
kind : {'loc', 'getitem'}
"""
assert kind in ["loc", "getitem"], kind

# potentially cast the bounds to integers
start, stop, step = key.start, key.stop, key.step
Expand All @@ -4097,22 +4098,14 @@ def _convert_slice_indexer(self, key: slice, kind: str_t):
return self.slice_indexer(start, stop, step)

# figure out if this is a positional indexer
def is_int(v):
return v is None or is_integer(v)

is_index_slice = is_int(start) and is_int(stop) and is_int(step)

# special case for interval_dtype bc we do not do partial-indexing
# on integer Intervals when slicing
# TODO: write this in terms of e.g. should_partial_index?
ints_are_positional = self._should_fallback_to_positional or isinstance(
self.dtype, IntervalDtype
)
is_positional = is_index_slice and ints_are_positional
is_index_slice = is_valid_positional_slice(key)

if kind == "getitem":
# called from the getitem slicers, validate that we are in fact integers
if is_index_slice or is_integer_dtype(self.dtype):
if is_index_slice:
# In this case the _validate_indexer checks below are redundant
return key
elif self.dtype.kind in "iu":
# Note: these checks are redundant if we know is_index_slice
self._validate_indexer("slice", key.start, "getitem")
self._validate_indexer("slice", key.stop, "getitem")
Expand All @@ -4121,6 +4114,14 @@ def is_int(v):

# convert the slice to an indexer here

# special case for interval_dtype bc we do not do partial-indexing
# on integer Intervals when slicing
# TODO: write this in terms of e.g. should_partial_index?
ints_are_positional = self._should_fallback_to_positional or isinstance(
self.dtype, IntervalDtype
)
is_positional = is_index_slice and ints_are_positional

# if we are mixed and have integers
if is_positional:
try:
Expand Down Expand Up @@ -4152,7 +4153,7 @@ def is_int(v):
@final
def _raise_invalid_indexer(
self,
form: str_t,
form: Literal["slice", "positional"],
key,
reraise: lib.NoDefault | None | Exception = lib.no_default,
) -> None:
Expand Down Expand Up @@ -6380,14 +6381,17 @@ def _maybe_cast_listlike_indexer(self, target) -> Index:
return ensure_index(target)

@final
def _validate_indexer(self, form: str_t, key, kind: str_t) -> None:
def _validate_indexer(
self,
form: Literal["positional", "slice"],
key,
kind: Literal["getitem", "iloc"],
) -> None:
"""
If we are positional indexer, validate that we have appropriate
typed bounds must be an integer.
"""
assert kind in ["getitem", "iloc"]

if key is not None and not is_integer(key):
if not lib.is_int_or_none(key):
self._raise_invalid_indexer(form, key)

def _maybe_cast_slice_bound(self, label, side: str_t):
Expand Down
2 changes: 1 addition & 1 deletion pandas/core/indexes/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -784,7 +784,7 @@ def _index_as_unique(self) -> bool:
"cannot handle overlapping indices; use IntervalIndex.get_indexer_non_unique"
)

def _convert_slice_indexer(self, key: slice, kind: str):
def _convert_slice_indexer(self, key: slice, kind: Literal["loc", "getitem"]):
if not (key.step is None or key.step == 1):
# GH#31658 if label-based, we require step == 1,
# if positional, we disallow float start/stop
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@
npt,
)
_empty_range = range(0)
_dtype_int64 = np.dtype(np.int64)


class RangeIndex(Index):
Expand Down Expand Up @@ -309,7 +310,7 @@ def memory_usage(self, deep: bool = False) -> int:

@property
def dtype(self) -> np.dtype:
return np.dtype(np.int64)
return _dtype_int64

@property
def is_unique(self) -> bool:
Expand Down