From a90098fb8d72a446402480bcf38dd0012bc2f17b Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Sun, 18 Oct 2020 16:07:04 -0400 Subject: [PATCH 1/3] CLN: Simplify gathering of results in aggregate --- pandas/core/aggregation.py | 48 +++++--------------- pandas/tests/frame/apply/test_frame_apply.py | 5 +- 2 files changed, 14 insertions(+), 39 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index ba7638e269fc0..e00e64131c435 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -622,37 +622,17 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs): keys = list(arg.keys()) # combine results + from pandas.core.generic import NDFrame - def is_any_series() -> bool: - # return a boolean if we have *any* nested series - return any(isinstance(r, ABCSeries) for r in results.values()) - - def is_any_frame() -> bool: - # return a boolean if we have *any* nested series - return any(isinstance(r, ABCDataFrame) for r in results.values()) - - if isinstance(results, list): - return concat(results, keys=keys, axis=1, sort=True), True - - elif is_any_frame(): - # we have a dict of DataFrames - # return a MI DataFrame - - keys_to_use = [k for k in keys if not results[k].empty] - # Have to check, if at least one DataFrame is not empty. - keys_to_use = keys_to_use if keys_to_use != [] else keys - return ( - concat([results[k] for k in keys_to_use], keys=keys_to_use, axis=1), - True, - ) - - elif isinstance(obj, ABCSeries) and is_any_series(): - - # we have a dict of Series - # return a MI Series + if any(isinstance(r, NDFrame) for r in results.values()): try: - result = concat(results) - except TypeError as err: + keys_to_use = [k for k in keys if not results[k].empty] + # Have to check, if at least one DataFrame is not empty. + keys_to_use = keys_to_use if keys_to_use != [] else keys + axis = 0 if isinstance(obj, ABCSeries) else 1 + result = concat({k: results[k] for k in keys_to_use}, axis=axis) + # Raised if some value of results is not a NDFrame + except AttributeError as err: # we want to give a nice error here if # we have non-same sized objects, so # we don't automatically broadcast @@ -663,16 +643,10 @@ def is_any_frame() -> bool: "simultaneously" ) from err - return result, True - - # fall thru - from pandas import DataFrame, Series + else: + from pandas import Series - try: - result = DataFrame(results) - except ValueError: # we have a dict of scalars - # GH 36212 use name only if obj is a series if obj.ndim == 1: obj = cast("Series", obj) diff --git a/pandas/tests/frame/apply/test_frame_apply.py b/pandas/tests/frame/apply/test_frame_apply.py index 598da9c52731e..f3cb630d1be0c 100644 --- a/pandas/tests/frame/apply/test_frame_apply.py +++ b/pandas/tests/frame/apply/test_frame_apply.py @@ -1254,7 +1254,7 @@ def test_agg_reduce(self, axis, float_frame): # dict input with lists with multiple func = dict([(name1, ["mean", "sum"]), (name2, ["sum", "max"])]) result = float_frame.agg(func, axis=axis) - expected = DataFrame( + expected = pd.concat( dict( [ ( @@ -1278,7 +1278,8 @@ def test_agg_reduce(self, axis, float_frame): ), ), ] - ) + ), + axis=1, ) expected = expected.T if axis in {1, "columns"} else expected tm.assert_frame_equal(result, expected) From 3cf4db56417cf17c28dac9268733dbe7583a2448 Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 21 Oct 2020 20:05:16 -0400 Subject: [PATCH 2/3] Added ABCNDFrame, removed try-except. --- pandas/core/aggregation.py | 39 +++++++++++++++-------------------- pandas/core/dtypes/generic.py | 1 + 2 files changed, 18 insertions(+), 22 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index e00e64131c435..4ab0c2515f5fa 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -31,7 +31,7 @@ from pandas.core.dtypes.cast import is_nested_object from pandas.core.dtypes.common import is_dict_like, is_list_like -from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries +from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries from pandas.core.base import DataError, SpecificationError import pandas.core.common as com @@ -621,28 +621,23 @@ def aggregate(obj, arg: AggFuncType, *args, **kwargs): # set the final keys keys = list(arg.keys()) - # combine results - from pandas.core.generic import NDFrame - - if any(isinstance(r, NDFrame) for r in results.values()): - try: - keys_to_use = [k for k in keys if not results[k].empty] - # Have to check, if at least one DataFrame is not empty. - keys_to_use = keys_to_use if keys_to_use != [] else keys - axis = 0 if isinstance(obj, ABCSeries) else 1 - result = concat({k: results[k] for k in keys_to_use}, axis=axis) - # Raised if some value of results is not a NDFrame - except AttributeError as err: - # we want to give a nice error here if - # we have non-same sized objects, so - # we don't automatically broadcast - - raise ValueError( - "cannot perform both aggregation " - "and transformation operations " - "simultaneously" - ) from err + # Avoid making two isinstance calls in all and any below + is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()] + # combine results + if all(is_ndframe): + keys_to_use = [k for k in keys if not results[k].empty] + # Have to check, if at least one DataFrame is not empty. + keys_to_use = keys_to_use if keys_to_use != [] else keys + axis = 0 if isinstance(obj, ABCSeries) else 1 + result = concat({k: results[k] for k in keys_to_use}, axis=axis) + elif any(is_ndframe): + # There is a mix of NDFrames and scalars + raise ValueError( + "cannot perform both aggregation " + "and transformation operations " + "simultaneously" + ) else: from pandas import Series diff --git a/pandas/core/dtypes/generic.py b/pandas/core/dtypes/generic.py index 1f1017cfc1929..7d2549713c6bc 100644 --- a/pandas/core/dtypes/generic.py +++ b/pandas/core/dtypes/generic.py @@ -53,6 +53,7 @@ def _check(cls, inst) -> bool: }, ) +ABCNDFrame = create_pandas_abc_type("ABCNDFrame", "_typ", ("series", "dataframe")) ABCSeries = create_pandas_abc_type("ABCSeries", "_typ", ("series",)) ABCDataFrame = create_pandas_abc_type("ABCDataFrame", "_typ", ("dataframe",)) From db01e1b7ceb00a3069876aa6721a49a6f58cb45d Mon Sep 17 00:00:00 2001 From: Richard Shadrach Date: Wed, 21 Oct 2020 20:32:38 -0400 Subject: [PATCH 3/3] Added ABCNDFrame, removed try-except. --- pandas/core/frame.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 801307a8f9481..84b66d0c9519e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -7426,9 +7426,9 @@ def _gotitem( >>> df.agg({'A' : ['sum', 'min'], 'B' : ['min', 'max']}) A B - max NaN 8.0 - min 1.0 2.0 sum 12.0 NaN + min 1.0 2.0 + max NaN 8.0 Aggregate different functions over the columns and rename the index of the resulting DataFrame.