Skip to content

CLN: prevent libreduction TypeError #29228

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 29, 2019
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1104,6 +1104,7 @@ def _aggregate_item_by_item(self, func, *args, **kwargs):
# raised in _aggregate_named, handle at higher level
# see test_apply_with_mutated_index
raise
# otherwise we get here from an AttributeError in _make_wrapper
cannot_agg.append(item)
continue

Expand Down Expand Up @@ -1466,7 +1467,8 @@ def _transform_item_by_item(self, obj, wrapper):
output[col] = self[col].transform(wrapper)
except AssertionError:
raise
except Exception:
except TypeError:
# e.g. trying to call nanmean with string values
pass
else:
inds.append(i)
Expand Down
5 changes: 4 additions & 1 deletion pandas/core/groupby/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,12 +641,15 @@ def curried(x):
# if we don't have this method to indicated to aggregate to
# mark this column as an error
try:
return self._aggregate_item_by_item(name, *args, **kwargs)
result = self._aggregate_item_by_item(name, *args, **kwargs)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we just replace this function altogether with _python_agg_general? Conceptually it would seem like they do the same thing, though the former would actually be generic across Series and DataFrame

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I haven't checked specifically, but I'm hoping we can trim these before long

assert self.obj.ndim == 2
return result
except AttributeError:
# e.g. SparseArray has no flags attr
# FIXME: 'SeriesGroupBy' has no attribute '_aggregate_item_by_item'
# occurs in idxmax() case
# in tests.groupby.test_function.test_non_cython_api
assert self.obj.ndim == 1
raise ValueError

wrapper.__name__ = name
Expand Down
13 changes: 5 additions & 8 deletions pandas/core/groupby/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -669,13 +669,16 @@ def agg_series(self, obj, func):
if is_extension_array_dtype(obj.dtype) and obj.dtype.kind != "M":
# _aggregate_series_fast would raise TypeError when
# calling libreduction.Slider
# TODO: can we get a performant workaround for EAs backed by ndarray?
# TODO: is the datetime64tz case supposed to go through here?
return self._aggregate_series_pure_python(obj, func)

elif obj.index._has_complex_internals:
# MultiIndex; Pre-empt TypeError in _aggregate_series_fast
return self._aggregate_series_pure_python(obj, func)

try:
return self._aggregate_series_fast(obj, func)
except AssertionError:
raise
except ValueError as err:
if "No result." in str(err):
# raised in libreduction
Expand All @@ -685,12 +688,6 @@ def agg_series(self, obj, func):
pass
else:
raise
except TypeError as err:
if "ndarray" in str(err):
# raised in libreduction if obj's values is no ndarray
pass
else:
raise
return self._aggregate_series_pure_python(obj, func)

def _aggregate_series_fast(self, obj, func):
Expand Down