diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index dc343f670b725..a7eea5364a547 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -16,8 +16,10 @@ Callable, FrozenSet, Iterable, + List, Mapping, Sequence, + Tuple, Type, Union, cast, @@ -993,25 +995,25 @@ def _iterate_slices(self) -> Iterable[Series]: def _cython_agg_general( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 ) -> DataFrame: - agg_items, agg_blocks = self._cython_agg_blocks( + agg_blocks, agg_items = self._cython_agg_blocks( how, alt=alt, numeric_only=numeric_only, min_count=min_count ) return self._wrap_agged_blocks(agg_blocks, items=agg_items) def _cython_agg_blocks( self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1 - ): + ) -> "Tuple[List[Block], Index]": # TODO: the actual managing of mgr_locs is a PITA # here, it should happen via BlockManager.combine - data = self._get_data_to_aggregate() + data: BlockManager = self._get_data_to_aggregate() if numeric_only: data = data.get_numeric_data(copy=False) - new_blocks = [] - new_items = [] - deleted_items = [] + agg_blocks: List[Block] = [] + new_items: List[np.ndarray] = [] + deleted_items: List[np.ndarray] = [] no_result = object() for block in data.blocks: # Avoid inheriting result from earlier in the loop @@ -1077,20 +1079,20 @@ def _cython_agg_blocks( # reshape to be valid for non-Extension Block result = result.reshape(1, -1) - newb = block.make_block(result) + agg_block: Block = block.make_block(result) new_items.append(locs) - new_blocks.append(newb) + agg_blocks.append(agg_block) - if len(new_blocks) == 0: + if not agg_blocks: raise DataError("No numeric types to aggregate") # reset the locs in the blocks to correspond to our # current ordering indexer = np.concatenate(new_items) - new_items = data.items.take(np.sort(indexer)) + agg_items = data.items.take(np.sort(indexer)) - if len(deleted_items): + if deleted_items: # we need to adjust the indexer to account for the # items we have removed @@ -1103,12 +1105,12 @@ def _cython_agg_blocks( indexer = (ai - mask.cumsum())[indexer] offset = 0 - for b in new_blocks: - loc = len(b.mgr_locs) - b.mgr_locs = indexer[offset : (offset + loc)] + for blk in agg_blocks: + loc = len(blk.mgr_locs) + blk.mgr_locs = indexer[offset : (offset + loc)] offset += loc - return new_items, new_blocks + return agg_blocks, agg_items def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame: if self.grouper.nkeys != 1: @@ -1615,7 +1617,7 @@ def _wrap_frame_output(self, result, obj) -> DataFrame: else: return DataFrame(result, index=obj.index, columns=result_index) - def _get_data_to_aggregate(self): + def _get_data_to_aggregate(self) -> BlockManager: obj = self._obj_with_exclusions if self.axis == 1: return obj.T._data diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py index a99ebe77e8254..37067a1897a52 100644 --- a/pandas/core/groupby/ops.py +++ b/pandas/core/groupby/ops.py @@ -563,7 +563,9 @@ def _cython_operation( return result, names - def aggregate(self, values, how: str, axis: int = 0, min_count: int = -1): + def aggregate( + self, values, how: str, axis: int = 0, min_count: int = -1 + ) -> Tuple[np.ndarray, Optional[List[str]]]: return self._cython_operation( "aggregate", values, how, axis, min_count=min_count ) diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index eb5b5181d894d..122080bbf3ce7 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -242,7 +242,7 @@ def array_dtype(self): """ return self.dtype - def make_block(self, values, placement=None): + def make_block(self, values, placement=None) -> "Block": """ Create a new block, with type inference propagate any values that are not specified