Skip to content

CLN: Simplify aggregation.aggregate #37033

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Oct 10, 2020
Merged
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 19 additions & 68 deletions pandas/core/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,92 +608,43 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs):

from pandas.core.reshape.concat import concat

def _agg_1dim(name, how, subset=None):
"""
aggregate a 1-dim with how
"""
colg = obj._gotitem(name, ndim=1, subset=subset)
if colg.ndim != 1:
raise SpecificationError(
"nested dictionary is ambiguous in aggregation"
)
return colg.aggregate(how)

def _agg_2dim(how):
"""
aggregate a 2-dim with how
"""
colg = obj._gotitem(obj._selection, ndim=2, subset=selected_obj)
return colg.aggregate(how)

def _agg(arg, func):
"""
run the aggregations over the arg with func
return a dict
"""
result = {}
results = {}
if selected_obj.ndim == 1:
# fname only used for output
colg = obj._gotitem(obj._selection, ndim=1)
for fname, agg_how in arg.items():
result[fname] = func(fname, agg_how)
return result
results[fname] = colg.aggregate(agg_how)
else:
# fname used for column selection and output
for fname, agg_how in arg.items():
colg = obj._gotitem(fname, ndim=1)
results[fname] = colg.aggregate(agg_how)

# set the final keys
keys = list(arg.keys())

if obj._selection is not None:

sl = set(obj._selection_list)

# we are a Series like object,
# but may have multiple aggregations
if len(sl) == 1:

result = _agg(
arg, lambda fname, agg_how: _agg_1dim(obj._selection, agg_how)
)

# we are selecting the same set as we are aggregating
elif not len(sl - set(keys)):

result = _agg(arg, _agg_1dim)

# we are a DataFrame, with possibly multiple aggregations
else:

result = _agg(arg, _agg_2dim)

# no selection
else:

try:
result = _agg(arg, _agg_1dim)
except SpecificationError:

# we are aggregating expecting all 1d-returns
# but we have 2d
result = _agg(arg, _agg_2dim)

# combine results

def is_any_series() -> bool:
# return a boolean if we have *any* nested series
return any(isinstance(r, ABCSeries) for r in result.values())
return any(isinstance(r, ABCSeries) for r in results.values())

def is_any_frame() -> bool:
# return a boolean if we have *any* nested series
return any(isinstance(r, ABCDataFrame) for r in result.values())
return any(isinstance(r, ABCDataFrame) for r in results.values())

if isinstance(result, list):
return concat(result, keys=keys, axis=1, sort=True), True
if isinstance(results, list):
return concat(results, keys=keys, axis=1, sort=True), True

elif is_any_frame():
# we have a dict of DataFrames
# return a MI DataFrame

keys_to_use = [k for k in keys if not result[k].empty]
keys_to_use = [k for k in keys if not results[k].empty]
# Have to check, if at least one DataFrame is not empty.
keys_to_use = keys_to_use if keys_to_use != [] else keys
return (
concat([result[k] for k in keys_to_use], keys=keys_to_use, axis=1),
concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1),
True,
)

Expand All @@ -702,7 +653,7 @@ def is_any_frame() -> bool:
# we have a dict of Series
# return a MI Series
try:
result = concat(result)
result = concat(results)
except TypeError as err:
# we want to give a nice error here if
# we have non-same sized objects, so
Expand All @@ -720,7 +671,7 @@ def is_any_frame() -> bool:
from pandas import DataFrame, Series

try:
result = DataFrame(result)
result = DataFrame(results)
except ValueError:
# we have a dict of scalars

Expand All @@ -731,7 +682,7 @@ def is_any_frame() -> bool:
else:
name = None

result = Series(result, name=name)
result = Series(results, name=name)

return result, True
elif is_list_like(arg):
Expand Down