From 3b940655f85646c489c98344cfcfc5bb00a6231e Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 07:39:21 -0800 Subject: [PATCH 1/2] CLN: remove unreached boolean-mask case from _preprocess_slice_or_indexer --- pandas/core/internals/concat.py | 4 +-- pandas/core/internals/managers.py | 41 ++++++++++++------------ pandas/tests/internals/test_internals.py | 16 ++++++--- 3 files changed, 34 insertions(+), 27 deletions(-) diff --git a/pandas/core/internals/concat.py b/pandas/core/internals/concat.py index e2949eb227fbf..ee684d40cd95f 100644 --- a/pandas/core/internals/concat.py +++ b/pandas/core/internals/concat.py @@ -52,7 +52,7 @@ from pandas import Index -def concatenate_array_managers( +def _concatenate_array_managers( mgrs_indexers, axes: List[Index], concat_axis: int, copy: bool ) -> Manager: """ @@ -110,7 +110,7 @@ def concatenate_managers( """ # TODO(ArrayManager) this assumes that all managers are of the same type if isinstance(mgrs_indexers[0][0], ArrayManager): - return concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) + return _concatenate_array_managers(mgrs_indexers, axes, concat_axis, copy) concat_plans = [ _get_mgr_concatenation_plan(mgr, indexers) for mgr, indexers in mgrs_indexers diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 6bd3e37ae101e..35175e1a8099b 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -35,6 +35,7 @@ from pandas.core.dtypes.cast import infer_dtype_from_scalar from pandas.core.dtypes.common import ( DT64NS_DTYPE, + ensure_int64, is_dtype_equal, is_extension_array_dtype, is_list_like, @@ -1291,8 +1292,8 @@ def insert( def reindex_indexer( self: T, - new_axis, - indexer, + new_axis: Index, + indexer: Optional[np.ndarray], # TODO: np.ndarray[np.int64] axis: int, fill_value=None, allow_dups: bool = False, @@ -1357,7 +1358,10 @@ def reindex_indexer( return type(self).from_blocks(new_blocks, new_axes) def _slice_take_blocks_ax0( - self, slice_or_indexer, fill_value=lib.no_default, only_slice: bool = False + self, + slice_or_indexer: Union[slice, np.ndarray], + fill_value=lib.no_default, + only_slice: bool = False, ) -> List[Block]: """ Slice/take blocks along axis=0. @@ -1366,7 +1370,7 @@ def _slice_take_blocks_ax0( Parameters ---------- - slice_or_indexer : slice, ndarray[bool], or list-like of ints + slice_or_indexer : slice or np.ndarray[int64] fill_value : scalar, default lib.no_default only_slice : bool, default False If True, we always return views on existing arrays, never copies. @@ -1385,12 +1389,11 @@ def _slice_take_blocks_ax0( if self.is_single_block: blk = self.blocks[0] - if sl_type in ("slice", "mask"): + if sl_type == "slice": # GH#32959 EABlock would fail since we can't make 0-width # TODO(EA2D): special casing unnecessary with 2D EAs if sllen == 0: return [] - # TODO: tests all have isinstance(slobj, slice), other possibilities? return [blk.getitem_block(slobj, new_mgr_locs=slice(0, sllen))] elif not allow_fill or self.ndim == 1: if allow_fill and fill_value is None: @@ -1416,7 +1419,7 @@ def _slice_take_blocks_ax0( ) ] - if sl_type in ("slice", "mask"): + if sl_type == "slice": blknos = self.blknos[slobj] blklocs = self.blklocs[slobj] else: @@ -1652,9 +1655,6 @@ def get_slice(self, slobj: slice, axis: int = 0) -> SingleBlockManager: blk = self._block array = blk._slice(slobj) - if array.ndim > blk.values.ndim: - # This will be caught by Series._get_values - raise ValueError("dimension-expanding indexing not allowed") block = blk.make_block_same_class(array, placement=slice(0, len(array))) new_index = self.index._getitem_slice(slobj) return type(self)(block, new_index) @@ -1969,10 +1969,6 @@ def _merge_blocks( if can_consolidate: - if dtype is None: - if len({b.dtype for b in blocks}) != 1: - raise AssertionError("_merge_blocks are invalid!") - # TODO: optimization potential in case all mgrs contain slices and # combination of those slices is a slice, too. new_mgr_locs = np.concatenate([b.mgr_locs.as_array for b in blocks]) @@ -1999,20 +1995,25 @@ def _fast_count_smallints(arr: np.ndarray) -> np.ndarray: return np.c_[nz, counts[nz]] -def _preprocess_slice_or_indexer(slice_or_indexer, length: int, allow_fill: bool): +def _preprocess_slice_or_indexer( + slice_or_indexer: Union[slice, np.ndarray], length: int, allow_fill: bool +): if isinstance(slice_or_indexer, slice): return ( "slice", slice_or_indexer, libinternals.slice_len(slice_or_indexer, length), ) - elif ( - isinstance(slice_or_indexer, np.ndarray) and slice_or_indexer.dtype == np.bool_ - ): - return "mask", slice_or_indexer, slice_or_indexer.sum() else: + if ( + not isinstance(slice_or_indexer, np.ndarray) + or slice_or_indexer.dtype.kind != "i" + ): + dtype = getattr(slice_or_indexer, "dtype", None) + raise TypeError(type(slice_or_indexer), dtype) + # TODO: np.intp? - indexer = np.asanyarray(slice_or_indexer, dtype=np.int64) + indexer = ensure_int64(slice_or_indexer) if not allow_fill: indexer = maybe_convert_indices(indexer, length) return "fancy", indexer, len(indexer) diff --git a/pandas/tests/internals/test_internals.py b/pandas/tests/internals/test_internals.py index 3c37d827c0778..1728c31ebf767 100644 --- a/pandas/tests/internals/test_internals.py +++ b/pandas/tests/internals/test_internals.py @@ -931,7 +931,9 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): tm.assert_index_equal(reindexed.axes[axis], new_labels) for ax in range(mgr.ndim): - assert_reindex_indexer_is_ok(mgr, ax, Index([]), [], fill_value) + assert_reindex_indexer_is_ok( + mgr, ax, Index([]), np.array([], dtype=np.intp), fill_value + ) assert_reindex_indexer_is_ok( mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax]), fill_value ) @@ -949,22 +951,26 @@ def assert_reindex_indexer_is_ok(mgr, axis, new_labels, indexer, fill_value): mgr, ax, mgr.axes[ax], np.arange(mgr.shape[ax])[::-1], fill_value ) assert_reindex_indexer_is_ok( - mgr, ax, Index(["foo", "bar", "baz"]), [0, 0, 0], fill_value + mgr, ax, Index(["foo", "bar", "baz"]), np.array([0, 0, 0]), fill_value ) assert_reindex_indexer_is_ok( - mgr, ax, Index(["foo", "bar", "baz"]), [-1, 0, -1], fill_value + mgr, ax, Index(["foo", "bar", "baz"]), np.array([-1, 0, -1]), fill_value ) assert_reindex_indexer_is_ok( mgr, ax, Index(["foo", mgr.axes[ax][0], "baz"]), - [-1, -1, -1], + np.array([-1, -1, -1]), fill_value, ) if mgr.shape[ax] >= 3: assert_reindex_indexer_is_ok( - mgr, ax, Index(["foo", "bar", "baz"]), [0, 1, 2], fill_value + mgr, + ax, + Index(["foo", "bar", "baz"]), + np.array([0, 1, 2]), + fill_value, ) From e8a98d0c1aaaeeeacb6bbcf06bb3e53354ffeffa Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 12 Mar 2021 14:54:20 -0800 Subject: [PATCH 2/2] revert annotation mypy doesnt like --- pandas/core/internals/managers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals/managers.py b/pandas/core/internals/managers.py index 8aed47845486f..7bbf341b844b8 100644 --- a/pandas/core/internals/managers.py +++ b/pandas/core/internals/managers.py @@ -1293,7 +1293,7 @@ def insert( def reindex_indexer( self: T, new_axis: Index, - indexer: Optional[np.ndarray], # TODO: np.ndarray[np.int64] + indexer, axis: int, fill_value=None, allow_dups: bool = False,