-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
REF: _cython_agg_blocks follow patterns similar to _apply_blockwise #35632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 8 commits
9ba0c86
288ca6d
13e9f52
ff61504
8ca83c3
6dc6a50
e265ec9
6b33a5c
2d1ec3c
dbf5592
20a618d
ee66b00
663f93b
836c60b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1027,7 +1027,6 @@ def _cython_agg_blocks( | |
data = data.get_numeric_data(copy=False) | ||
|
||
agg_blocks: List[Block] = [] | ||
new_items: List[np.ndarray] = [] | ||
deleted_items: List[np.ndarray] = [] | ||
|
||
no_result = object() | ||
|
@@ -1059,8 +1058,7 @@ def cast_result_block(result, block: "Block", how: str) -> "Block": | |
agg_block: Block = block.make_block(result) | ||
return agg_block | ||
|
||
for block in data.blocks: | ||
# Avoid inheriting result from earlier in the loop | ||
def blk_func(block): | ||
result = no_result | ||
locs = block.mgr_locs.as_array | ||
try: | ||
|
@@ -1076,8 +1074,7 @@ def cast_result_block(result, block: "Block", how: str) -> "Block": | |
# we cannot perform the operation | ||
# in an alternate way, exclude the block | ||
assert how == "ohlc" | ||
deleted_items.append(locs) | ||
continue | ||
raise | ||
|
||
# call our grouper again with only this block | ||
obj = self.obj[data.items[locs]] | ||
|
@@ -1096,8 +1093,7 @@ def cast_result_block(result, block: "Block", how: str) -> "Block": | |
except TypeError: | ||
# we may have an exception in trying to aggregate | ||
# continue and exclude the block | ||
deleted_items.append(locs) | ||
continue | ||
raise | ||
else: | ||
result = cast(DataFrame, result) | ||
# unwrap DataFrame to get array | ||
|
@@ -1107,21 +1103,35 @@ def cast_result_block(result, block: "Block", how: str) -> "Block": | |
# is a lie. To keep the code-path for the typical non-split case | ||
# clean, we choose to clean up this mess later on. | ||
assert len(locs) == result.shape[1] | ||
new_blocks = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. and move this to a higher scope (as used in both parts) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We only append to it in this scope, everywhere else we set it directly There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this is used in 3 blackes (sure you don't append and instead directly assign in the other scopes), but it is the same name. I would simply define it once, then append for each (or leave the assign for the others is ok i guess); it just is much easier to grok the fact that we are defining the same variable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done+green |
||
for i, loc in enumerate(locs): | ||
new_items.append(np.array([loc], dtype=locs.dtype)) | ||
agg_block = result.iloc[:, [i]]._mgr.blocks[0] | ||
agg_blocks.append(agg_block) | ||
new_blocks.append(agg_block) | ||
else: | ||
result = result._mgr.blocks[0].values | ||
if isinstance(result, np.ndarray) and result.ndim == 1: | ||
result = result.reshape(1, -1) | ||
agg_block = cast_result_block(result, block, how) | ||
new_items.append(locs) | ||
agg_blocks.append(agg_block) | ||
new_blocks = [agg_block] | ||
else: | ||
agg_block = cast_result_block(result, block, how) | ||
new_items.append(locs) | ||
agg_blocks.append(agg_block) | ||
new_blocks = [agg_block] | ||
return new_blocks | ||
|
||
skipped = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you type this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. done |
||
new_items: List[np.ndarray] = [] | ||
for i, block in enumerate(data.blocks): | ||
try: | ||
nbs = blk_func(block) | ||
except (NotImplementedError, TypeError): | ||
# TypeError -> we may have an exception in trying to aggregate | ||
# continue and exclude the block | ||
# NotImplementedError -> "ohlc" with wrong dtype | ||
skipped.append(i) | ||
deleted_items.append(block.mgr_locs.as_array) | ||
else: | ||
agg_blocks.extend(nbs) | ||
new_items.append(block.mgr_locs.as_array) | ||
|
||
if not agg_blocks: | ||
raise DataError("No numeric types to aggregate") | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
can you type this