Skip to content

ENH: Implement Keyword Aggregation for DataFrame.agg and Series.agg #29116

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 148 commits into from
Jul 10, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
148 commits
Select commit Hold shift + click to select a range
7e461a1
remove \n from docstring
charlesdong1991 Dec 3, 2018
1314059
fix conflicts
charlesdong1991 Jan 19, 2019
8bcb313
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Jul 30, 2019
7bc368d
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Oct 20, 2019
cf5c6c3
Implement agg for DataFrame
charlesdong1991 Oct 20, 2019
5298331
fix conflict
charlesdong1991 Oct 20, 2019
4fb74b5
remove unused import
charlesdong1991 Oct 20, 2019
97209be
remove print
charlesdong1991 Oct 20, 2019
ca273ff
fix test
charlesdong1991 Oct 20, 2019
1d2ab15
fix typo
charlesdong1991 Oct 20, 2019
3ca193c
add keyword agg for series
charlesdong1991 Oct 20, 2019
c8f80ed
fix linting
charlesdong1991 Oct 20, 2019
8c738e9
fix PY35 issue
charlesdong1991 Oct 21, 2019
d4d9ea4
try to fix py35 order issue
charlesdong1991 Oct 21, 2019
2a6de27
test if fixed
charlesdong1991 Oct 21, 2019
21e09f9
test again
charlesdong1991 Oct 21, 2019
058a8e9
simpler code
charlesdong1991 Oct 21, 2019
0da68d8
test py35
charlesdong1991 Oct 22, 2019
15e3659
fix conflict
charlesdong1991 Oct 22, 2019
438398d
test PY35
charlesdong1991 Oct 23, 2019
d47b790
try to fix py35
charlesdong1991 Oct 23, 2019
832b8d9
find py35 output
charlesdong1991 Oct 23, 2019
5a3b690
test py35
charlesdong1991 Oct 23, 2019
4fb86f0
retest py35
charlesdong1991 Oct 23, 2019
a1369bf
retest py35
charlesdong1991 Oct 23, 2019
ef981a3
try to fix py35
charlesdong1991 Oct 23, 2019
82c8960
try to fix py35
charlesdong1991 Oct 23, 2019
c610391
try one more time
charlesdong1991 Oct 23, 2019
679ba59
fix typo
charlesdong1991 Oct 23, 2019
2ee2628
py35
charlesdong1991 Oct 23, 2019
31f7033
skip PY35
charlesdong1991 Oct 23, 2019
2acb244
skip py35
charlesdong1991 Oct 23, 2019
dfbd67a
fix typo
charlesdong1991 Oct 23, 2019
ff5e60f
skip all py35
charlesdong1991 Oct 23, 2019
7c6c891
skip py35 for series
charlesdong1991 Oct 23, 2019
3e55fcb
fix test
charlesdong1991 Oct 23, 2019
6d74b29
skip series py35
charlesdong1991 Oct 23, 2019
532337e
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Oct 29, 2019
400ff3e
merge master and remove helper
charlesdong1991 Nov 8, 2019
05af2de
remove helper
charlesdong1991 Nov 8, 2019
6206fa4
remove py36
charlesdong1991 Nov 8, 2019
34199ad
put back imports
charlesdong1991 Nov 8, 2019
15d099c
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Nov 8, 2019
c56f05f
avoid circular dependency
charlesdong1991 Nov 8, 2019
d3f0620
fix linting
charlesdong1991 Nov 8, 2019
20ecfda
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Dec 19, 2019
89b8e6b
code change based on review
charlesdong1991 Dec 20, 2019
8aa1cc9
remove util
charlesdong1991 Dec 20, 2019
091ca75
Add docstring
charlesdong1991 Dec 20, 2019
c2d5104
fix circular import
charlesdong1991 Dec 20, 2019
50ebdaf
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jan 3, 2020
0484f5e
reorg and deduplicate
charlesdong1991 Jan 3, 2020
425c802
remove used imports
charlesdong1991 Jan 3, 2020
d5c2c6c
fix linting
charlesdong1991 Jan 3, 2020
8bb9714
fix wrong import
charlesdong1991 Jan 3, 2020
2607c5d
fix conflict
charlesdong1991 Jan 3, 2020
0545231
isort
charlesdong1991 Jan 3, 2020
0a27889
fix mypy
charlesdong1991 Jan 3, 2020
a66053e
Code change based on review
charlesdong1991 Jan 6, 2020
7311ef0
dropna
charlesdong1991 Jan 6, 2020
da2ff37
fix logic
charlesdong1991 Jan 7, 2020
bcc5bc3
fix logic
charlesdong1991 Jan 7, 2020
0825027
remove unused
charlesdong1991 Jan 7, 2020
d3c35f5
fix linting
charlesdong1991 Jan 7, 2020
cef2b50
simpler python
charlesdong1991 Jan 7, 2020
b96a942
fix conflicts
charlesdong1991 Jan 8, 2020
3123284
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jan 9, 2020
7bb3bd0
fix conflicts
charlesdong1991 Jan 21, 2020
3da2e2a
fix merge error
charlesdong1991 Jan 21, 2020
3ce91fc
fixup
charlesdong1991 Jan 21, 2020
1426ee2
fix annotation
charlesdong1991 Jan 21, 2020
5893a0e
fix annotation
charlesdong1991 Jan 21, 2020
cc85db4
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jan 23, 2020
0f55073
move code
charlesdong1991 Jan 25, 2020
90d52ba
move it back
charlesdong1991 Jan 25, 2020
381a697
fixup
charlesdong1991 Jan 25, 2020
238b4cc
add docstring
charlesdong1991 Jan 25, 2020
f8e1891
add func
charlesdong1991 Jan 25, 2020
66e9b38
isort
charlesdong1991 Jan 25, 2020
f4d8a4f
fix linting
charlesdong1991 Jan 26, 2020
c3e34a0
fix linting
charlesdong1991 Jan 26, 2020
0c0dbad
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jan 26, 2020
61f6201
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jan 28, 2020
88c7751
code change on JR reviews
charlesdong1991 Feb 2, 2020
e2b957a
move
charlesdong1991 Feb 2, 2020
99f75b2
linting
charlesdong1991 Feb 2, 2020
30b7296
isort
charlesdong1991 Feb 2, 2020
baea583
code change
charlesdong1991 Feb 12, 2020
04bffe6
add docstring
charlesdong1991 Feb 12, 2020
42091c3
add None back
charlesdong1991 Feb 12, 2020
fc13e19
fix annotation
charlesdong1991 Feb 12, 2020
1403426
better annotation
charlesdong1991 Feb 12, 2020
3d9655e
fix annotation
charlesdong1991 Feb 12, 2020
d78c57c
fix annotation
charlesdong1991 Feb 12, 2020
0487928
fix linting
charlesdong1991 Feb 16, 2020
7435ac5
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Feb 19, 2020
f1cd16c
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Feb 23, 2020
469691c
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Feb 24, 2020
1bb35b5
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Feb 27, 2020
7a6f496
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Mar 15, 2020
3730f7d
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Mar 30, 2020
cd8d00f
simpler python
charlesdong1991 Mar 30, 2020
6dddd55
simpler python
charlesdong1991 Mar 30, 2020
96dc3ed
fixup
charlesdong1991 Mar 30, 2020
075b85b
simplification
charlesdong1991 Mar 30, 2020
a44471c
better docs
charlesdong1991 Mar 30, 2020
0e2eae4
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Apr 10, 2020
2fb4b27
add docs
charlesdong1991 Apr 10, 2020
5e04185
focs
charlesdong1991 Apr 10, 2020
56d0f89
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Apr 10, 2020
7f4839e
fix doc
charlesdong1991 Apr 10, 2020
65d578b
fixup
charlesdong1991 Apr 10, 2020
3e6a06c
fix up
charlesdong1991 Apr 10, 2020
8651447
fix doctest
charlesdong1991 Apr 10, 2020
a7439fe
doctest
charlesdong1991 Apr 11, 2020
449d40f
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Apr 11, 2020
9fd8ec5
rebuild
charlesdong1991 Apr 11, 2020
736bea2
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Apr 15, 2020
d20be20
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 May 10, 2020
35b2b17
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 May 22, 2020
74d6169
fixup and resolve conflict and merge master
charlesdong1991 Jun 16, 2020
0546224
cleaner code
charlesdong1991 Jun 16, 2020
f5f0e68
rename
charlesdong1991 Jun 16, 2020
54ff962
linting
charlesdong1991 Jun 16, 2020
ac57023
init
charlesdong1991 Jun 16, 2020
484e42c
better doc
charlesdong1991 Jun 17, 2020
47e6598
complex case
charlesdong1991 Jun 17, 2020
f28b452
linting
charlesdong1991 Jun 17, 2020
81b4186
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 17, 2020
1fd4b5b
resolve conflict and merge master
charlesdong1991 Jun 19, 2020
47dc5fe
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 20, 2020
9190f7f
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 20, 2020
89de59e
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 23, 2020
8493383
add typing
charlesdong1991 Jun 25, 2020
9a9dd7f
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 25, 2020
c75c882
black
charlesdong1991 Jun 25, 2020
fa61db7
remove line
charlesdong1991 Jun 25, 2020
00a1ccf
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jun 27, 2020
26b380a
simplify annotation
charlesdong1991 Jul 5, 2020
165ea83
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jul 5, 2020
d6923f2
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jul 8, 2020
f6a5cc1
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jul 9, 2020
a747ab6
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jul 10, 2020
44405e8
deprivate relabel_result
charlesdong1991 Jul 10, 2020
faea906
cleaner annotations
charlesdong1991 Jul 10, 2020
7e30a61
Merge remote-tracking branch 'upstream/master' into nested_renaming_agg
charlesdong1991 Jul 10, 2020
3d20524
fix import sorting
charlesdong1991 Jul 10, 2020
05921af
move defined annotation inside aggregation.py
charlesdong1991 Jul 10, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
162 changes: 161 additions & 1 deletion pandas/core/aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,99 @@

from collections import defaultdict
from functools import partial
from typing import Any, Callable, DefaultDict, List, Sequence, Tuple, Union
from typing import (
Any,
Callable,
DefaultDict,
Dict,
List,
Optional,
Sequence,
Tuple,
Union,
)

from pandas._typing import Label

from pandas.core.dtypes.common import is_dict_like, is_list_like

from pandas.core.base import SpecificationError
import pandas.core.common as com
from pandas.core.indexes.api import Index
from pandas.core.series import FrameOrSeriesUnion, Series

# types of `func` kwarg for DataFrame.aggregate and Series.aggregate
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

great, we may want to think about putting these in _typing, but separate issue / PR (you can just try it if you want), but in a follown.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh, okay! i deliberately moved it from _typing to here in commit 05921af since i thought this might be only used in this file.

I will do a PR for moving it to _typing.

AggFuncTypeBase = Union[Callable, str]
AggFuncType = Union[
AggFuncTypeBase,
List[AggFuncTypeBase],
Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]],
]


def reconstruct_func(
func: Optional[AggFuncType], **kwargs,
) -> Tuple[
bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]],
]:
"""
This is the internal function to reconstruct func given if there is relabeling
or not and also normalize the keyword to get new order of columns.

If named aggregation is applied, `func` will be None, and kwargs contains the
column and aggregation function information to be parsed;
If named aggregation is not applied, `func` is either string (e.g. 'min') or
Callable, or list of them (e.g. ['min', np.max]), or the dictionary of column name
and str/Callable/list of them (e.g. {'A': 'min'}, or {'A': [np.min, lambda x: x]})

If relabeling is True, will return relabeling, reconstructed func, column
names, and the reconstructed order of columns.
If relabeling is False, the columns and order will be None.

Parameters
----------
func: agg function (e.g. 'min' or Callable) or list of agg functions
(e.g. ['min', np.max]) or dictionary (e.g. {'A': ['min', np.max]}).
**kwargs: dict, kwargs used in is_multi_agg_with_relabel and
normalize_keyword_aggregation function for relabelling

Returns
-------
relabelling: bool, if there is relabelling or not
func: normalized and mangled func
columns: list of column names
order: list of columns indices

Examples
--------
>>> reconstruct_func(None, **{"foo": ("col", "min")})
(True, defaultdict(None, {'col': ['min']}), ('foo',), array([0]))

>>> reconstruct_func("min")
(False, 'min', None, None)
"""
relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
columns: Optional[List[str]] = None
order: Optional[List[int]] = None

if not relabeling:
if isinstance(func, list) and len(func) > len(set(func)):

# GH 28426 will raise error if duplicated function names are used and
# there is no reassigned name
raise SpecificationError(
"Function names must be unique if there is no new column names "
"assigned"
)
elif func is None:
# nicer error message
raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")

if relabeling:
func, columns, order = normalize_keyword_aggregation(kwargs)
func = maybe_mangle_lambdas(func)

return relabeling, func, columns, order


def is_multi_agg_with_relabel(**kwargs) -> bool:
Expand Down Expand Up @@ -198,6 +285,79 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
return mangled_aggspec


def relabel_result(
result: FrameOrSeriesUnion,
func: Dict[str, List[Union[Callable, str]]],
columns: Tuple,
order: List[int],
) -> Dict[Label, Series]:
"""Internal function to reorder result if relabelling is True for
dataframe.agg, and return the reordered result in dict.

Parameters:
----------
result: Result from aggregation
func: Dict of (column name, funcs)
columns: New columns name for relabelling
order: New order for relabelling

Examples:
---------
>>> result = DataFrame({"A": [np.nan, 2, np.nan],
... "C": [6, np.nan, np.nan], "B": [np.nan, 4, 2.5]}) # doctest: +SKIP
>>> funcs = {"A": ["max"], "C": ["max"], "B": ["mean", "min"]}
>>> columns = ("foo", "aab", "bar", "dat")
>>> order = [0, 1, 2, 3]
>>> _relabel_result(result, func, columns, order) # doctest: +SKIP
dict(A=Series([2.0, NaN, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
C=Series([NaN, 6.0, NaN, NaN], index=["foo", "aab", "bar", "dat"]),
B=Series([NaN, NaN, 2.5, 4.0], index=["foo", "aab", "bar", "dat"]))
"""
reordered_indexes = [
pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
]
reordered_result_in_dict: Dict[Label, Series] = {}
idx = 0

reorder_mask = not isinstance(result, Series) and len(result.columns) > 1
for col, fun in func.items():
s = result[col].dropna()

# In the `_aggregate`, the callable names are obtained and used in `result`, and
# these names are ordered alphabetically. e.g.
# C2 C1
# <lambda> 1 NaN
# amax NaN 4.0
# max NaN 4.0
# sum 18.0 6.0
# Therefore, the order of functions for each column could be shuffled
# accordingly so need to get the callable name if it is not parsed names, and
# reorder the aggregated result for each column.
# e.g. if df.agg(c1=("C2", sum), c2=("C2", lambda x: min(x))), correct order is
# [sum, <lambda>], but in `result`, it will be [<lambda>, sum], and we need to
# reorder so that aggregated values map to their functions regarding the order.

# However there is only one column being used for aggregation, not need to
# reorder since the index is not sorted, and keep as is in `funcs`, e.g.
# A
# min 1.0
# mean 1.5
# mean 1.5
if reorder_mask:
fun = [
com.get_callable_name(f) if not isinstance(f, str) else f for f in fun
]
col_idx_order = Index(s.index).get_indexer(fun)
s = s[col_idx_order]

# assign the new user-provided "named aggregation" as index names, and reindex
# it based on the whole user-provided names.
s.index = reordered_indexes[idx : idx + len(fun)]
reordered_result_in_dict[col] = s.reindex(columns, copy=False)
idx = idx + len(fun)
return reordered_result_in_dict


def validate_func_kwargs(
kwargs: dict,
) -> Tuple[List[str], List[Union[str, Callable[..., Any]]]]:
Expand Down
12 changes: 11 additions & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@

from pandas.core import algorithms, common as com, nanops, ops
from pandas.core.accessor import CachedAccessor
from pandas.core.aggregation import reconstruct_func, relabel_result
from pandas.core.arrays import Categorical, ExtensionArray
from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin as DatetimeLikeArray
from pandas.core.arrays.sparse import SparseFrameAccessor
Expand Down Expand Up @@ -7301,9 +7302,11 @@ def _gotitem(
examples=_agg_examples_doc,
versionadded="\n.. versionadded:: 0.20.0\n",
)
def aggregate(self, func, axis=0, *args, **kwargs):
def aggregate(self, func=None, axis=0, *args, **kwargs):
axis = self._get_axis_number(axis)

relabeling, func, columns, order = reconstruct_func(func, **kwargs)

result = None
try:
result, how = self._aggregate(func, axis=axis, *args, **kwargs)
Expand All @@ -7315,6 +7318,13 @@ def aggregate(self, func, axis=0, *args, **kwargs):
raise exc from err
if result is None:
return self.apply(func, axis=axis, args=args, **kwargs)

if relabeling:
# This is to keep the order to columns occurrence unchanged, and also
# keep the order of new columns occurrence unchanged
result_in_dict = relabel_result(result, func, columns, order)
result = DataFrame(result_in_dict, index=columns)

return result

def _aggregate(self, arg, axis=0, *args, **kwargs):
Expand Down
22 changes: 2 additions & 20 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,8 @@
from pandas.core.dtypes.missing import isna, notna

from pandas.core.aggregation import (
is_multi_agg_with_relabel,
maybe_mangle_lambdas,
normalize_keyword_aggregation,
reconstruct_func,
validate_func_kwargs,
)
import pandas.core.algorithms as algorithms
Expand Down Expand Up @@ -937,24 +936,7 @@ def aggregate(
self, func=None, *args, engine="cython", engine_kwargs=None, **kwargs
):

relabeling = func is None and is_multi_agg_with_relabel(**kwargs)
if relabeling:
func, columns, order = normalize_keyword_aggregation(kwargs)

kwargs = {}
elif isinstance(func, list) and len(func) > len(set(func)):

# GH 28426 will raise error if duplicated function names are used and
# there is no reassigned name
raise SpecificationError(
"Function names must be unique if there is no new column "
"names assigned"
)
elif func is None:
# nicer error message
raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")

func = maybe_mangle_lambdas(func)
relabeling, func, columns, order = reconstruct_func(func, **kwargs)

if engine == "numba":
return self._python_agg_general(
Expand Down
7 changes: 6 additions & 1 deletion pandas/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -4016,9 +4016,14 @@ def _gotitem(self, key, ndim, subset=None) -> "Series":
examples=_agg_examples_doc,
versionadded="\n.. versionadded:: 0.20.0\n",
)
def aggregate(self, func, axis=0, *args, **kwargs):
def aggregate(self, func=None, axis=0, *args, **kwargs):
# Validate the axis parameter
self._get_axis_number(axis)

# if func is None, will switch to user-provided "named aggregation" kwargs
if func is None:
func = dict(kwargs.items())

result, how = self._aggregate(func, *args, **kwargs)
if result is None:

Expand Down
Empty file.
104 changes: 104 additions & 0 deletions pandas/tests/frame/apply/test_apply_relabeling.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm


class TestDataFrameNamedAggregate:
def test_agg_relabel(self):
# GH 26513
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})

# simplest case with one column, one func
result = df.agg(foo=("B", "sum"))
expected = pd.DataFrame({"B": [10]}, index=pd.Index(["foo"]))
tm.assert_frame_equal(result, expected)

# test on same column with different methods
result = df.agg(foo=("B", "sum"), bar=("B", "min"))
expected = pd.DataFrame({"B": [10, 1]}, index=pd.Index(["foo", "bar"]))

tm.assert_frame_equal(result, expected)

def test_agg_relabel_multi_columns_multi_methods(self):
# GH 26513, test on multiple columns with multiple methods
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
result = df.agg(
foo=("A", "sum"),
bar=("B", "mean"),
cat=("A", "min"),
dat=("B", "max"),
f=("A", "max"),
g=("C", "min"),
)
expected = pd.DataFrame(
{
"A": [6.0, np.nan, 1.0, np.nan, 2.0, np.nan],
"B": [np.nan, 2.5, np.nan, 4.0, np.nan, np.nan],
"C": [np.nan, np.nan, np.nan, np.nan, np.nan, 3.0],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "g"]),
)
tm.assert_frame_equal(result, expected)

def test_agg_relabel_partial_functions(self):
# GH 26513, test on partial, functools or more complex cases
df = pd.DataFrame({"A": [1, 2, 1, 2], "B": [1, 2, 3, 4], "C": [3, 4, 5, 6]})
result = df.agg(foo=("A", np.mean), bar=("A", "mean"), cat=("A", min))
expected = pd.DataFrame(
{"A": [1.5, 1.5, 1.0]}, index=pd.Index(["foo", "bar", "cat"])
)
tm.assert_frame_equal(result, expected)

result = df.agg(
foo=("A", min),
bar=("A", np.min),
cat=("B", max),
dat=("C", "min"),
f=("B", np.sum),
kk=("B", lambda x: min(x)),
)
expected = pd.DataFrame(
{
"A": [1.0, 1.0, np.nan, np.nan, np.nan, np.nan],
"B": [np.nan, np.nan, 4.0, np.nan, 10.0, 1.0],
"C": [np.nan, np.nan, np.nan, 3.0, np.nan, np.nan],
},
index=pd.Index(["foo", "bar", "cat", "dat", "f", "kk"]),
)
tm.assert_frame_equal(result, expected)

def test_agg_namedtuple(self):
# GH 26513
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
result = df.agg(
foo=pd.NamedAgg("B", "sum"),
bar=pd.NamedAgg("B", min),
cat=pd.NamedAgg(column="B", aggfunc="count"),
fft=pd.NamedAgg("B", aggfunc="max"),
)

expected = pd.DataFrame(
{"B": [3, 1, 2, 2]}, index=pd.Index(["foo", "bar", "cat", "fft"])
)
tm.assert_frame_equal(result, expected)

result = df.agg(
foo=pd.NamedAgg("A", "min"),
bar=pd.NamedAgg(column="B", aggfunc="max"),
cat=pd.NamedAgg(column="A", aggfunc="max"),
)
expected = pd.DataFrame(
{"A": [0.0, np.nan, 1.0], "B": [np.nan, 2.0, np.nan]},
index=pd.Index(["foo", "bar", "cat"]),
)
tm.assert_frame_equal(result, expected)

def test_agg_raises(self):
# GH 26513
df = pd.DataFrame({"A": [0, 1], "B": [1, 2]})
msg = "Must provide"

with pytest.raises(TypeError, match=msg):
df.agg()
Empty file.
Loading