Skip to content

BUG: Fix TypeError in _cython_agg_blocks #29035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 7 commits into from
Oct 17, 2019
21 changes: 19 additions & 2 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,6 +966,11 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):

# call our grouper again with only this block
obj = self.obj[data.items[locs]]
if obj.shape[1] == 1:
# Avoid call to self.values that can occur in DataFrame
# reductions; see GH#28949
obj = obj.iloc[:, 0]

s = groupby(obj, self.grouper)
try:
result = s.aggregate(lambda x: alt(x, axis=self.axis))
Expand All @@ -974,17 +979,29 @@ def _cython_agg_blocks(self, how, alt=None, numeric_only=True, min_count=-1):
# continue and exclude the block
deleted_items.append(locs)
continue

# unwrap DataFrame to get array
assert len(result._data.blocks) == 1
result = result._data.blocks[0].values
if result.ndim == 1 and isinstance(result, np.ndarray):
result = result.reshape(1, -1)

finally:
assert not isinstance(result, DataFrame)

if result is not no_result:
# see if we can cast the block back to the original dtype
result = maybe_downcast_numeric(result, block.dtype)

if result.ndim == 1 and isinstance(result, np.ndarray):
if block.is_extension and isinstance(result, np.ndarray):
# e.g. block.values was an IntegerArray
# (1, N) case can occur if block.values was Categorical
# and result is ndarray[object]
assert result.ndim == 1 or result.shape[0] == 1
try:
# Cast back if feasible
result = type(block.values)._from_sequence(
result, dtype=block.values.dtype
result.ravel(), dtype=block.values.dtype
)
except ValueError:
# reshape to be valid for non-Extension Block
Expand Down
15 changes: 8 additions & 7 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1357,13 +1357,14 @@ def f(self, **kwargs):
raise SpecificationError(str(e))
except DataError:
pass
except Exception:
# TODO: the remaining test cases that get here are from:
# - AttributeError from _cython_agg_blocks bug passing
# DataFrame to make_block; see GH#28275
# - TypeError in _cython_operation calling ensure_float64
# on object array containing complex numbers;
# see test_groupby_complex, test_max_nan_bug
except (TypeError, NotImplementedError):
# TODO:
# - TypeError: this is reached via test_groupby_complex
# and can be fixed by implementing _group_add for
# complex dtypes
# - NotImplementedError: reached in test_max_nan_bug,
# raised in _get_cython_function and should probably
# be handled inside _cython_agg_blocks
pass

# apply a non-cython aggregation
Expand Down