diff --git a/pandas/_libs/internals.pyx b/pandas/_libs/internals.pyx index 150b7f62b4b26..9b2db897cdd32 100644 --- a/pandas/_libs/internals.pyx +++ b/pandas/_libs/internals.pyx @@ -15,11 +15,13 @@ cimport numpy as cnp from numpy cimport ( NPY_INT64, int64_t, + ndarray, ) cnp.import_array() from pandas._libs.algos import ensure_int64 +from pandas._libs.util cimport is_integer_object @cython.final @@ -27,10 +29,10 @@ cdef class BlockPlacement: # __slots__ = '_as_slice', '_as_array', '_len' cdef: slice _as_slice - object _as_array + ndarray _as_array # Note: this still allows `None` bint _has_slice, _has_array, _is_known_slice_like - def __init__(self, val): + def __cinit__(self, val): cdef: slice slc @@ -39,7 +41,7 @@ cdef class BlockPlacement: self._has_slice = False self._has_array = False - if isinstance(val, int): + if is_integer_object(val): slc = slice(val, val + 1, 1) self._as_slice = slc self._has_slice = True @@ -160,12 +162,12 @@ cdef class BlockPlacement: np.concatenate([self.as_array] + [o.as_array for o in others]) ) - cdef iadd(self, other): + cdef BlockPlacement iadd(self, other): cdef: slice s = self._ensure_has_slice() Py_ssize_t other_int, start, stop, step, l - if isinstance(other, int) and s is not None: + if is_integer_object(other) and s is not None: other_int = other if other_int == 0: @@ -438,13 +440,13 @@ def get_blkno_placements(blknos, group: bool = True): """ Parameters ---------- - blknos : array of int64 + blknos : np.ndarray[int64] group : bool, default True Returns ------- iterator - yield (BlockPlacement, blkno) + yield (blkno, BlockPlacement) """ blknos = ensure_int64(blknos) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 7f5e7e3a32f14..09c143468bc31 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4534,7 +4534,6 @@ def __getitem__(self, key): # There's no custom logic to be implemented in __getslice__, so it's # not overloaded intentionally. getitem = self._data.__getitem__ - promote = self._shallow_copy if is_scalar(key): key = com.cast_scalar_indexer(key, warn_float=True) @@ -4543,7 +4542,9 @@ def __getitem__(self, key): if isinstance(key, slice): # This case is separated from the conditional above to avoid # pessimization of basic indexing. - return promote(getitem(key)) + result = getitem(key) + # Going through simple_new for performance. + return type(self)._simple_new(result, name=self.name) if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) @@ -4553,7 +4554,9 @@ def __getitem__(self, key): if np.ndim(result) > 1: deprecate_ndim_indexing(result) return result - return promote(result) + # NB: Using _constructor._simple_new would break if MultiIndex + # didn't override __getitem__ + return self._constructor._simple_new(result, name=self.name) else: return result diff --git a/pandas/core/indexes/category.py b/pandas/core/indexes/category.py index 13c53dfafed4d..7a204dcce8a88 100644 --- a/pandas/core/indexes/category.py +++ b/pandas/core/indexes/category.py @@ -231,7 +231,7 @@ def _simple_new(cls, values: Categorical, name: Optional[Hashable] = None): result = object.__new__(cls) result._data = values - result.name = name + result._name = name result._cache = {} result._reset_identity() diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 88b92c7b304ae..fd0e0ef5fa799 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2076,15 +2076,16 @@ def __getitem__(self, key): return tuple(retval) else: + # in general cannot be sure whether the result will be sorted + sortorder = None if com.is_bool_indexer(key): key = np.asarray(key, dtype=bool) sortorder = self.sortorder - else: - # cannot be sure whether the result will be sorted - sortorder = None - - if isinstance(key, Index): - key = np.asarray(key) + elif isinstance(key, slice): + if key.step is None or key.step > 0: + sortorder = self.sortorder + elif isinstance(key, Index): + key = np.asarray(key) new_codes = [level_codes[key] for level_codes in self.codes] diff --git a/pandas/core/indexes/range.py b/pandas/core/indexes/range.py index 5c97361aa53fe..bf5a9825f04d0 100644 --- a/pandas/core/indexes/range.py +++ b/pandas/core/indexes/range.py @@ -164,7 +164,7 @@ def _simple_new(cls, values: range, name: Hashable = None) -> RangeIndex: assert isinstance(values, range) result._range = values - result.name = name + result._name = name result._cache = {} result._reset_identity() return result diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 597023cb5b000..fc29c89612a35 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -449,7 +449,7 @@ def _split_op_result(self, result) -> List[Block]: nbs = [] for i, loc in enumerate(self.mgr_locs): vals = result[i] - block = self.make_block(values=vals, placement=[loc]) + block = self.make_block(values=vals, placement=loc) nbs.append(block) return nbs diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 744d3453c8a96..b656c9e83e1a8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -149,6 +149,9 @@ class BlockManager(DataManager): _blknos: np.ndarray _blklocs: np.ndarray + # Non-trivially faster than a property + ndim = 2 # overridden by SingleBlockManager + def __init__( self, blocks: Sequence[Block], @@ -173,6 +176,21 @@ def __init__( self._blknos = None self._blklocs = None + @classmethod + def _simple_new(cls, blocks: Tuple[Block, ...], axes: List[Index]): + """ + Fastpath constructor; does NO validation. + """ + obj = cls.__new__(cls) + obj.axes = axes + obj.blocks = blocks + + # Populate known_consolidate, blknos, and blklocs lazily + obj._known_consolidated = False + obj._blknos = None + obj._blklocs = None + return obj + @classmethod def from_blocks(cls, blocks: List[Block], axes: List[Index]): """ @@ -233,10 +251,6 @@ def __nonzero__(self) -> bool: def shape(self) -> Shape: return tuple(len(ax) for ax in self.axes) - @property - def ndim(self) -> int: - return len(self.axes) - def _normalize_axis(self, axis): # switch axis to follow BlockManager logic if self.ndim == 2: @@ -800,8 +814,7 @@ def get_slice(self, slobj: slice, axis: int = 0) -> BlockManager: new_axes = list(self.axes) new_axes[axis] = new_axes[axis][slobj] - bm = type(self)(new_blocks, new_axes, verify_integrity=False) - return bm + return type(self)._simple_new(tuple(new_blocks), new_axes) @property def nblocks(self) -> int: @@ -1322,7 +1335,7 @@ def reindex_indexer( def _slice_take_blocks_ax0( self, slice_or_indexer, fill_value=lib.no_default, only_slice: bool = False - ): + ) -> List[Block]: """ Slice/take blocks along axis=0.