Skip to content

REF: simpilify _cython_agg_general #41271

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
May 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 34 additions & 33 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -345,47 +345,48 @@ def _aggregate_multiple_funcs(self, arg):
def _cython_agg_general(
self, how: str, alt=None, numeric_only: bool = True, min_count: int = -1
):
output: dict[base.OutputKey, ArrayLike] = {}
# Ideally we would be able to enumerate self._iterate_slices and use
# the index from enumeration as the key of output, but ohlc in particular
# returns a (n x 4) array. Output requires 1D ndarrays as values, so we
# need to slice that up into 1D arrays
idx = 0
for obj in self._iterate_slices():
name = obj.name
is_numeric = is_numeric_dtype(obj.dtype)
if numeric_only and not is_numeric:
continue

objvals = obj._values

if isinstance(objvals, Categorical):
if self.grouper.ngroups > 0:
# without special-casing, we would raise, then in fallback
# would eventually call agg_series but without re-casting
# to Categorical
# equiv: res_values, _ = self.grouper.agg_series(obj, alt)
res_values, _ = self.grouper._aggregate_series_pure_python(obj, alt)
else:
# equiv: res_values = self._python_agg_general(alt)
res_values = self._python_apply_general(alt, self._selected_obj)
obj = self._selected_obj
objvals = obj._values

result = type(objvals)._from_sequence(res_values, dtype=objvals.dtype)
if numeric_only and not is_numeric_dtype(obj.dtype):
raise DataError("No numeric types to aggregate")

else:
# This is overkill because it is only called once, but is here to
# mirror the array_func used in DataFrameGroupBy._cython_agg_general
def array_func(values: ArrayLike) -> ArrayLike:
try:
result = self.grouper._cython_operation(
"aggregate", obj._values, how, axis=0, min_count=min_count
"aggregate", values, how, axis=0, min_count=min_count
)
except NotImplementedError:
ser = Series(values) # equiv 'obj' from outer frame
if self.grouper.ngroups > 0:
res_values, _ = self.grouper.agg_series(ser, alt)
else:
# equiv: res_values = self._python_agg_general(alt)
# error: Incompatible types in assignment (expression has
# type "Union[DataFrame, Series]", variable has type
# "Union[ExtensionArray, ndarray]")
res_values = self._python_apply_general( # type: ignore[assignment]
alt, ser
)

assert result.ndim == 1
key = base.OutputKey(label=name, position=idx)
output[key] = result
idx += 1
if isinstance(values, Categorical):
# Because we only get here with known dtype-preserving
# reductions, we cast back to Categorical.
# TODO: if we ever get "rank" working, exclude it here.
result = type(values)._from_sequence(res_values, dtype=values.dtype)
else:
result = res_values
return result

if not output:
raise DataError("No numeric types to aggregate")
result = array_func(objvals)

return self._wrap_aggregated_output(output)
ser = self.obj._constructor(
result, index=self.grouper.result_index, name=obj.name
)
return self._reindex_output(ser)

def _wrap_aggregated_output(
self,
Expand Down
9 changes: 0 additions & 9 deletions pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,15 +1281,6 @@ def _agg_general(
)
except DataError:
pass
except NotImplementedError as err:
if "function is not implemented for this dtype" in str(
err
) or "category dtype not supported" in str(err):
# raised in _get_cython_function, in some cases can
# be trimmed by implementing cython funcs for more dtypes
pass
else:
raise

# apply a non-cython aggregation
if result is None:
Expand Down