Skip to content

Commit 60eab98

Browse files
topper-123jreback
authored andcommitted
TYP: DataFrameGroupBy._cython_agg_blocks (#30465)
1 parent b3335d7 commit 60eab98

File tree

3 files changed

+22
-18
lines changed

3 files changed

+22
-18
lines changed

pandas/core/groupby/generic.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -16,8 +16,10 @@
1616
Callable,
1717
FrozenSet,
1818
Iterable,
19+
List,
1920
Mapping,
2021
Sequence,
22+
Tuple,
2123
Type,
2224
Union,
2325
cast,
@@ -993,25 +995,25 @@ def _iterate_slices(self) -> Iterable[Series]:
993995
def _cython_agg_general(
994996
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
995997
) -> DataFrame:
996-
agg_items, agg_blocks = self._cython_agg_blocks(
998+
agg_blocks, agg_items = self._cython_agg_blocks(
997999
how, alt=alt, numeric_only=numeric_only, min_count=min_count
9981000
)
9991001
return self._wrap_agged_blocks(agg_blocks, items=agg_items)
10001002

10011003
def _cython_agg_blocks(
10021004
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
1003-
):
1005+
) -> "Tuple[List[Block], Index]":
10041006
# TODO: the actual managing of mgr_locs is a PITA
10051007
# here, it should happen via BlockManager.combine
10061008

1007-
data = self._get_data_to_aggregate()
1009+
data: BlockManager = self._get_data_to_aggregate()
10081010

10091011
if numeric_only:
10101012
data = data.get_numeric_data(copy=False)
10111013

1012-
new_blocks = []
1013-
new_items = []
1014-
deleted_items = []
1014+
agg_blocks: List[Block] = []
1015+
new_items: List[np.ndarray] = []
1016+
deleted_items: List[np.ndarray] = []
10151017
no_result = object()
10161018
for block in data.blocks:
10171019
# Avoid inheriting result from earlier in the loop
@@ -1077,20 +1079,20 @@ def _cython_agg_blocks(
10771079
# reshape to be valid for non-Extension Block
10781080
result = result.reshape(1, -1)
10791081

1080-
newb = block.make_block(result)
1082+
agg_block: Block = block.make_block(result)
10811083

10821084
new_items.append(locs)
1083-
new_blocks.append(newb)
1085+
agg_blocks.append(agg_block)
10841086

1085-
if len(new_blocks) == 0:
1087+
if not agg_blocks:
10861088
raise DataError("No numeric types to aggregate")
10871089

10881090
# reset the locs in the blocks to correspond to our
10891091
# current ordering
10901092
indexer = np.concatenate(new_items)
1091-
new_items = data.items.take(np.sort(indexer))
1093+
agg_items = data.items.take(np.sort(indexer))
10921094

1093-
if len(deleted_items):
1095+
if deleted_items:
10941096

10951097
# we need to adjust the indexer to account for the
10961098
# items we have removed
@@ -1103,12 +1105,12 @@ def _cython_agg_blocks(
11031105
indexer = (ai - mask.cumsum())[indexer]
11041106

11051107
offset = 0
1106-
for b in new_blocks:
1107-
loc = len(b.mgr_locs)
1108-
b.mgr_locs = indexer[offset : (offset + loc)]
1108+
for blk in agg_blocks:
1109+
loc = len(blk.mgr_locs)
1110+
blk.mgr_locs = indexer[offset : (offset + loc)]
11091111
offset += loc
11101112

1111-
return new_items, new_blocks
1113+
return agg_blocks, agg_items
11121114

11131115
def _aggregate_frame(self, func, *args, **kwargs) -> DataFrame:
11141116
if self.grouper.nkeys != 1:
@@ -1615,7 +1617,7 @@ def _wrap_frame_output(self, result, obj) -> DataFrame:
16151617
else:
16161618
return DataFrame(result, index=obj.index, columns=result_index)
16171619

1618-
def _get_data_to_aggregate(self):
1620+
def _get_data_to_aggregate(self) -> BlockManager:
16191621
obj = self._obj_with_exclusions
16201622
if self.axis == 1:
16211623
return obj.T._data

pandas/core/groupby/ops.py

+3-1
Original file line numberDiff line numberDiff line change
@@ -563,7 +563,9 @@ def _cython_operation(
563563

564564
return result, names
565565

566-
def aggregate(self, values, how: str, axis: int = 0, min_count: int = -1):
566+
def aggregate(
567+
self, values, how: str, axis: int = 0, min_count: int = -1
568+
) -> Tuple[np.ndarray, Optional[List[str]]]:
567569
return self._cython_operation(
568570
"aggregate", values, how, axis, min_count=min_count
569571
)

pandas/core/internals/blocks.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def array_dtype(self):
242242
"""
243243
return self.dtype
244244

245-
def make_block(self, values, placement=None):
245+
def make_block(self, values, placement=None) -> "Block":
246246
"""
247247
Create a new block, with type inference propagate any values that are
248248
not specified

0 commit comments

Comments
 (0)