From bcf503644b0a17d955c73cd379a8aec7a2b0ad6b Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 07:58:15 -0700 Subject: [PATCH 1/3] REF: simplify _cython_agg_blocks --- pandas/core/groupby/generic.py | 27 +++++++++++---------------- 1 file changed, 11 insertions(+), 16 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 166631e69f523..068b57d24bc85 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1101,23 +1101,18 @@ def blk_func(block: "Block") -> List["Block"]: raise else: result = cast(DataFrame, result) + + # In the case of object dtype block, it may have been split + # in the operation. We un-split here. + result = result._consolidate() + assert len(result._mgr.blocks) == 1 + # unwrap DataFrame to get array - if len(result._mgr.blocks) != 1: - # We've split an object block! Everything we've assumed - # about a single block input returning a single block output - # is a lie. To keep the code-path for the typical non-split case - # clean, we choose to clean up this mess later on. - assert len(locs) == result.shape[1] - for i, loc in enumerate(locs): - agg_block = result.iloc[:, [i]]._mgr.blocks[0] - agg_block.mgr_locs = [loc] - new_blocks.append(agg_block) - else: - result = result._mgr.blocks[0].values - if isinstance(result, np.ndarray) and result.ndim == 1: - result = result.reshape(1, -1) - agg_block = cast_result_block(result, block, how) - new_blocks = [agg_block] + result = result._mgr.blocks[0].values + if isinstance(result, np.ndarray) and result.ndim == 1: + result = result.reshape(1, -1) + agg_block = cast_result_block(result, block, how) + new_blocks = [agg_block] else: agg_block = cast_result_block(result, block, how) new_blocks = [agg_block] From 6daf33dc07090ec44416043300b806f360f2b950 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 08:36:06 -0700 Subject: [PATCH 2/3] mypy fixup --- pandas/core/groupby/generic.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 068b57d24bc85..eee4271eeafd8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -24,7 +24,6 @@ Tuple, Type, Union, - cast, ) import warnings @@ -1100,11 +1099,11 @@ def blk_func(block: "Block") -> List["Block"]: # continue and exclude the block raise else: - result = cast(DataFrame, result) - + assert isinstance(result, DataFrame) # for mypy # In the case of object dtype block, it may have been split # in the operation. We un-split here. result = result._consolidate() + assert isinstance(result, DataFrame) # for mypy assert len(result._mgr.blocks) == 1 # unwrap DataFrame to get array From 0edd87bdbdc24a0c8ee3f95ef7e8e8967362da62 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 21 Aug 2020 09:17:28 -0700 Subject: [PATCH 3/3] fix assertion --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index eee4271eeafd8..60e23b14eaf09 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1099,11 +1099,11 @@ def blk_func(block: "Block") -> List["Block"]: # continue and exclude the block raise else: - assert isinstance(result, DataFrame) # for mypy + assert isinstance(result, (Series, DataFrame)) # for mypy # In the case of object dtype block, it may have been split # in the operation. We un-split here. result = result._consolidate() - assert isinstance(result, DataFrame) # for mypy + assert isinstance(result, (Series, DataFrame)) # for mypy assert len(result._mgr.blocks) == 1 # unwrap DataFrame to get array