From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/12] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From ac5ea7dbf19429c6f9564102d6b4f3bb6b4b04b3 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 9 Jan 2020 19:02:17 +0100 Subject: [PATCH 02/12] move file to aggregation --- pandas/core/aggregation.py | 177 +++++++++++++++++++++++++++++ pandas/core/groupby/generic.py | 201 ++------------------------------- 2 files changed, 187 insertions(+), 191 deletions(-) create mode 100644 pandas/core/aggregation.py diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py new file mode 100644 index 0000000000000..d377cf49de28f --- /dev/null +++ b/pandas/core/aggregation.py @@ -0,0 +1,177 @@ +from collections import defaultdict +from functools import partial +from typing import Any, Sequence + +from pandas.core.dtypes.common import is_dict_like, is_list_like + +import pandas.core.common as com +from pandas.core.indexes.api import Index + + +def is_multi_agg_with_relabel(**kwargs) -> bool: + """ + Check whether kwargs passed to .agg look like multi-agg with relabeling. + Parameters + ---------- + **kwargs : dict + Returns + ------- + bool + Examples + -------- + >>> is_multi_agg_with_relabel(a='max') + False + >>> is_multi_agg_with_relabel(a_max=('a', 'max'), + ... a_min=('a', 'min')) + True + >>> is_multi_agg_with_relabel() + False + """ + return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and ( + len(kwargs) > 0 + ) + + +def normalize_keyword_aggregation(kwargs): + """ + Normalize user-provided "named aggregation" kwargs. + Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs + to the old Dict[str, List[scalar]]]. + Parameters + ---------- + kwargs : dict + Returns + ------- + aggspec : dict + The transformed kwargs. + columns : List[str] + The user-provided keys. + col_idx_order : List[int] + List of columns indices. + Examples + -------- + >>> normalize_keyword_aggregation({'output': ('input', 'sum')}) + ({'input': ['sum']}, ('output',), [('input', 'sum')]) + """ + # Normalize the aggregation functions as Mapping[column, List[func]], + # process normally, then fixup the names. + # TODO: aggspec type: typing.Dict[str, List[AggScalar]] + # May be hitting https://github.com/python/mypy/issues/5958 + # saying it doesn't have an attribute __name__ + aggspec: DefaultDict = defaultdict(list) + order = [] + columns, pairs = list(zip(*kwargs.items())) + + for name, (column, aggfunc) in zip(columns, pairs): + aggspec[column].append(aggfunc) + order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + # uniquify aggfunc name if duplicated in order list + uniquified_order = _make_unique(order) + + # GH 25719, due to aggspec will change the order of assigned columns in aggregation + # uniquified_aggspec will store uniquified order list and will compare it with order + # based on index + aggspec_order = [ + (column, com.get_callable_name(aggfunc) or aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] + uniquified_aggspec = _make_unique(aggspec_order) + + # get the new indice of columns by comparison + col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) + return aggspec, columns, col_idx_order + + +def _make_unique(seq): + """Uniquify aggfunc name of the pairs in the order list + Examples: + -------- + >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) + [('a', '_0'), ('a', '_1'), ('b', '')] + """ + return [ + (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) + if seq.count(pair) > 1 + else pair + for i, pair in enumerate(seq) + ] + + +# TODO: Can't use, because mypy doesn't like us setting __name__ +# error: "partial[Any]" has no attribute "__name__" +# the type is: +# typing.Sequence[Callable[..., ScalarResult]] +# -> typing.Sequence[Callable[..., ScalarResult]]: + + +def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: + """ + Possibly mangle a list of aggfuncs. + Parameters + ---------- + aggfuncs : Sequence + Returns + ------- + mangled: list-like + A new AggSpec sequence, where lambdas have been converted + to have unique names. + Notes + ----- + If just one aggfunc is passed, the name will not be mangled. + """ + if len(aggfuncs) <= 1: + # don't mangle for .agg([lambda x: .]) + return aggfuncs + i = 0 + mangled_aggfuncs = [] + for aggfunc in aggfuncs: + if com.get_callable_name(aggfunc) == "": + aggfunc = partial(aggfunc) + aggfunc.__name__ = f"" + i += 1 + mangled_aggfuncs.append(aggfunc) + + return mangled_aggfuncs + + +def maybe_mangle_lambdas(agg_spec: Any) -> Any: + """ + Make new lambdas with unique names. + Parameters + ---------- + agg_spec : Any + An argument to GroupBy.agg. + Non-dict-like `agg_spec` are pass through as is. + For dict-like `agg_spec` a new spec is returned + with name-mangled lambdas. + Returns + ------- + mangled : Any + Same type as the input. + Examples + -------- + >>> maybe_mangle_lambdas('sum') + 'sum' + >>> maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP + [, + .f(*args, **kwargs)>] + """ + is_dict = is_dict_like(agg_spec) + if not (is_dict or is_list_like(agg_spec)): + return agg_spec + mangled_aggspec = type(agg_spec)() # dict or OrderdDict + + if is_dict: + for key, aggfuncs in agg_spec.items(): + if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): + mangled_aggfuncs = _managle_lambda_list(aggfuncs) + else: + mangled_aggfuncs = aggfuncs + + mangled_aggspec[key] = mangled_aggfuncs + else: + mangled_aggspec = _managle_lambda_list(agg_spec) + + return mangled_aggspec diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index c49677fa27a31..98cdcd0f2b6ee 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -5,7 +5,7 @@ These are user facing as the result of the ``df.groupby(...)`` operations, which here returns a DataFrameGroupBy object. """ -from collections import abc, defaultdict, namedtuple +from collections import abc, namedtuple import copy from functools import partial from textwrap import dedent @@ -42,10 +42,8 @@ ensure_int64, ensure_platform_int, is_bool, - is_dict_like, is_integer_dtype, is_interval_dtype, - is_list_like, is_numeric_dtype, is_object_dtype, is_scalar, @@ -53,6 +51,11 @@ ) from pandas.core.dtypes.missing import _isna_ndarraylike, isna, notna +from pandas.core.aggregation import ( + is_multi_agg_with_relabel, + maybe_mangle_lambdas, + normalize_keyword_aggregation, +) import pandas.core.algorithms as algorithms from pandas.core.base import DataError, SpecificationError import pandas.core.common as com @@ -249,7 +252,7 @@ def aggregate(self, func=None, *args, **kwargs): elif isinstance(func, abc.Iterable): # Catch instances of lists / tuples # but not the class list / tuple itself. - func = _maybe_mangle_lambdas(func) + func = maybe_mangle_lambdas(func) ret = self._aggregate_multiple_funcs(func) if relabeling: ret.columns = columns @@ -918,9 +921,9 @@ class DataFrameGroupBy(GroupBy): @Appender(_shared_docs["aggregate"]) def aggregate(self, func=None, *args, **kwargs): - relabeling = func is None and _is_multi_agg_with_relabel(**kwargs) + relabeling = func is None and is_multi_agg_with_relabel(**kwargs) if relabeling: - func, columns, order = _normalize_keyword_aggregation(kwargs) + func, columns, order = normalize_keyword_aggregation(kwargs) kwargs = {} elif isinstance(func, list) and len(func) > len(set(func)): @@ -935,7 +938,7 @@ def aggregate(self, func=None, *args, **kwargs): # nicer error message raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).") - func = _maybe_mangle_lambdas(func) + func = maybe_mangle_lambdas(func) result, how = self._aggregate(func, *args, **kwargs) if how is None: @@ -1860,190 +1863,6 @@ def groupby_series(obj, col=None): boxplot = boxplot_frame_groupby -def _is_multi_agg_with_relabel(**kwargs) -> bool: - """ - Check whether kwargs passed to .agg look like multi-agg with relabeling. - - Parameters - ---------- - **kwargs : dict - - Returns - ------- - bool - - Examples - -------- - >>> _is_multi_agg_with_relabel(a='max') - False - >>> _is_multi_agg_with_relabel(a_max=('a', 'max'), - ... a_min=('a', 'min')) - True - >>> _is_multi_agg_with_relabel() - False - """ - return all(isinstance(v, tuple) and len(v) == 2 for v in kwargs.values()) and ( - len(kwargs) > 0 - ) - - -def _normalize_keyword_aggregation(kwargs): - """ - Normalize user-provided "named aggregation" kwargs. - - Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs - to the old Dict[str, List[scalar]]]. - - Parameters - ---------- - kwargs : dict - - Returns - ------- - aggspec : dict - The transformed kwargs. - columns : List[str] - The user-provided keys. - col_idx_order : List[int] - List of columns indices. - - Examples - -------- - >>> _normalize_keyword_aggregation({'output': ('input', 'sum')}) - ({'input': ['sum']}, ('output',), [('input', 'sum')]) - """ - # Normalize the aggregation functions as Mapping[column, List[func]], - # process normally, then fixup the names. - # TODO: aggspec type: typing.Dict[str, List[AggScalar]] - # May be hitting https://github.com/python/mypy/issues/5958 - # saying it doesn't have an attribute __name__ - aggspec = defaultdict(list) - order = [] - columns, pairs = list(zip(*kwargs.items())) - - for name, (column, aggfunc) in zip(columns, pairs): - aggspec[column].append(aggfunc) - order.append((column, com.get_callable_name(aggfunc) or aggfunc)) - - # uniquify aggfunc name if duplicated in order list - uniquified_order = _make_unique(order) - - # GH 25719, due to aggspec will change the order of assigned columns in aggregation - # uniquified_aggspec will store uniquified order list and will compare it with order - # based on index - aggspec_order = [ - (column, com.get_callable_name(aggfunc) or aggfunc) - for column, aggfuncs in aggspec.items() - for aggfunc in aggfuncs - ] - uniquified_aggspec = _make_unique(aggspec_order) - - # get the new indice of columns by comparison - col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) - return aggspec, columns, col_idx_order - - -def _make_unique(seq): - """Uniquify aggfunc name of the pairs in the order list - - Examples: - -------- - >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) - [('a', '_0'), ('a', '_1'), ('b', '')] - """ - return [ - (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) - if seq.count(pair) > 1 - else pair - for i, pair in enumerate(seq) - ] - - -# TODO: Can't use, because mypy doesn't like us setting __name__ -# error: "partial[Any]" has no attribute "__name__" -# the type is: -# typing.Sequence[Callable[..., ScalarResult]] -# -> typing.Sequence[Callable[..., ScalarResult]]: - - -def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: - """ - Possibly mangle a list of aggfuncs. - - Parameters - ---------- - aggfuncs : Sequence - - Returns - ------- - mangled: list-like - A new AggSpec sequence, where lambdas have been converted - to have unique names. - - Notes - ----- - If just one aggfunc is passed, the name will not be mangled. - """ - if len(aggfuncs) <= 1: - # don't mangle for .agg([lambda x: .]) - return aggfuncs - i = 0 - mangled_aggfuncs = [] - for aggfunc in aggfuncs: - if com.get_callable_name(aggfunc) == "": - aggfunc = partial(aggfunc) - aggfunc.__name__ = f"" - i += 1 - mangled_aggfuncs.append(aggfunc) - - return mangled_aggfuncs - - -def _maybe_mangle_lambdas(agg_spec: Any) -> Any: - """ - Make new lambdas with unique names. - - Parameters - ---------- - agg_spec : Any - An argument to GroupBy.agg. - Non-dict-like `agg_spec` are pass through as is. - For dict-like `agg_spec` a new spec is returned - with name-mangled lambdas. - - Returns - ------- - mangled : Any - Same type as the input. - - Examples - -------- - >>> _maybe_mangle_lambdas('sum') - 'sum' - - >>> _maybe_mangle_lambdas([lambda: 1, lambda: 2]) # doctest: +SKIP - [, - .f(*args, **kwargs)>] - """ - is_dict = is_dict_like(agg_spec) - if not (is_dict or is_list_like(agg_spec)): - return agg_spec - mangled_aggspec = type(agg_spec)() # dict or OrderdDict - - if is_dict: - for key, aggfuncs in agg_spec.items(): - if is_list_like(aggfuncs) and not is_dict_like(aggfuncs): - mangled_aggfuncs = _managle_lambda_list(aggfuncs) - else: - mangled_aggfuncs = aggfuncs - - mangled_aggspec[key] = mangled_aggfuncs - else: - mangled_aggspec = _managle_lambda_list(agg_spec) - - return mangled_aggspec - - def _recast_datetimelike_result(result: DataFrame) -> DataFrame: """ If we have date/time like in the original, then coerce dates From 3dd5dca5a187c1b06015e28a2ee058d65cadb062 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 9 Jan 2020 19:31:00 +0100 Subject: [PATCH 03/12] fix import --- pandas/tests/groupby/aggregate/test_aggregate.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 0b72a61ed84de..fb31760a7f7cd 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -9,8 +9,8 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, concat import pandas._testing as tm +from pandas.core.aggregation import _make_unique, maybe_mangle_lambdas from pandas.core.base import SpecificationError -from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas from pandas.core.groupby.grouper import Grouping @@ -633,14 +633,14 @@ def test_lambda_named_agg(func): class TestLambdaMangling: def test_maybe_mangle_lambdas_passthrough(self): - assert _maybe_mangle_lambdas("mean") == "mean" - assert _maybe_mangle_lambdas(lambda x: x).__name__ == "" + assert maybe_mangle_lambdas("mean") == "mean" + assert maybe_mangle_lambdas(lambda x: x).__name__ == "" # don't mangel single lambda. - assert _maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" + assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" def test_maybe_mangle_lambdas_listlike(self): aggfuncs = [lambda x: 1, lambda x: 2] - result = _maybe_mangle_lambdas(aggfuncs) + result = maybe_mangle_lambdas(aggfuncs) assert result[0].__name__ == "" assert result[1].__name__ == "" assert aggfuncs[0](None) == result[0](None) @@ -648,13 +648,13 @@ def test_maybe_mangle_lambdas_listlike(self): def test_maybe_mangle_lambdas(self): func = {"A": [lambda x: 0, lambda x: 1]} - result = _maybe_mangle_lambdas(func) + result = maybe_mangle_lambdas(func) assert result["A"][0].__name__ == "" assert result["A"][1].__name__ == "" def test_maybe_mangle_lambdas_args(self): func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]} - result = _maybe_mangle_lambdas(func) + result = maybe_mangle_lambdas(func) assert result["A"][0].__name__ == "" assert result["A"][1].__name__ == "" @@ -664,7 +664,7 @@ def test_maybe_mangle_lambdas_args(self): def test_maybe_mangle_lambdas_named(self): func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} - result = _maybe_mangle_lambdas(func) + result = maybe_mangle_lambdas(func) assert result == func def test_basic(self): From 52947a49ad28aca4c4517c1cedcd03a151744597 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 9 Jan 2020 19:52:34 +0100 Subject: [PATCH 04/12] add defaultdict --- pandas/core/aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index d377cf49de28f..62f8ec0638b8f 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -1,6 +1,6 @@ from collections import defaultdict from functools import partial -from typing import Any, Sequence +from typing import Any, DefaultDict, Sequence from pandas.core.dtypes.common import is_dict_like, is_list_like From 73855f3a6cadb107d7a37b7844e2e82657ed6ae5 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 21:41:14 +0100 Subject: [PATCH 05/12] code change based on JR review --- pandas/core/aggregation.py | 25 +++++- .../tests/groupby/aggregate/test_aggregate.py | 81 ----------------- pandas/tests/test_aggregation.py | 90 +++++++++++++++++++ 3 files changed, 112 insertions(+), 84 deletions(-) create mode 100644 pandas/tests/test_aggregation.py diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 62f8ec0638b8f..8fe3fac411cf7 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -1,6 +1,6 @@ from collections import defaultdict from functools import partial -from typing import Any, DefaultDict, Sequence +from typing import Any, DefaultDict, List, Sequence, Tuple from pandas.core.dtypes.common import is_dict_like, is_list_like @@ -8,15 +8,24 @@ from pandas.core.indexes.api import Index +""" +aggregation.py contains utility functions to handle multiple named and lambda +kwarg aggregations in groupby and DataFrame/Series aggregation +""" + + def is_multi_agg_with_relabel(**kwargs) -> bool: """ Check whether kwargs passed to .agg look like multi-agg with relabeling. + Parameters ---------- **kwargs : dict + Returns ------- bool + Examples -------- >>> is_multi_agg_with_relabel(a='max') @@ -32,14 +41,16 @@ def is_multi_agg_with_relabel(**kwargs) -> bool: ) -def normalize_keyword_aggregation(kwargs): +def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[int]]: """ Normalize user-provided "named aggregation" kwargs. Transforms from the new ``Mapping[str, NamedAgg]`` style kwargs to the old Dict[str, List[scalar]]]. + Parameters ---------- kwargs : dict + Returns ------- aggspec : dict @@ -48,6 +59,7 @@ def normalize_keyword_aggregation(kwargs): The user-provided keys. col_idx_order : List[int] List of columns indices. + Examples -------- >>> normalize_keyword_aggregation({'output': ('input', 'sum')}) @@ -84,8 +96,9 @@ def normalize_keyword_aggregation(kwargs): return aggspec, columns, col_idx_order -def _make_unique(seq): +def _make_unique_kwarg_list(seq: List[tuple]) -> List[tuple]: """Uniquify aggfunc name of the pairs in the order list + Examples: -------- >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) @@ -109,14 +122,17 @@ def _make_unique(seq): def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: """ Possibly mangle a list of aggfuncs. + Parameters ---------- aggfuncs : Sequence + Returns ------- mangled: list-like A new AggSpec sequence, where lambdas have been converted to have unique names. + Notes ----- If just one aggfunc is passed, the name will not be mangled. @@ -139,6 +155,7 @@ def _managle_lambda_list(aggfuncs: Sequence[Any]) -> Sequence[Any]: def maybe_mangle_lambdas(agg_spec: Any) -> Any: """ Make new lambdas with unique names. + Parameters ---------- agg_spec : Any @@ -146,10 +163,12 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any: Non-dict-like `agg_spec` are pass through as is. For dict-like `agg_spec` a new spec is returned with name-mangled lambdas. + Returns ------- mangled : Any Same type as the input. + Examples -------- >>> maybe_mangle_lambdas('sum') diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index fb31760a7f7cd..3d842aca210ed 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -9,7 +9,6 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, concat import pandas._testing as tm -from pandas.core.aggregation import _make_unique, maybe_mangle_lambdas from pandas.core.base import SpecificationError from pandas.core.groupby.grouper import Grouping @@ -632,41 +631,6 @@ def test_lambda_named_agg(func): class TestLambdaMangling: - def test_maybe_mangle_lambdas_passthrough(self): - assert maybe_mangle_lambdas("mean") == "mean" - assert maybe_mangle_lambdas(lambda x: x).__name__ == "" - # don't mangel single lambda. - assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" - - def test_maybe_mangle_lambdas_listlike(self): - aggfuncs = [lambda x: 1, lambda x: 2] - result = maybe_mangle_lambdas(aggfuncs) - assert result[0].__name__ == "" - assert result[1].__name__ == "" - assert aggfuncs[0](None) == result[0](None) - assert aggfuncs[1](None) == result[1](None) - - def test_maybe_mangle_lambdas(self): - func = {"A": [lambda x: 0, lambda x: 1]} - result = maybe_mangle_lambdas(func) - assert result["A"][0].__name__ == "" - assert result["A"][1].__name__ == "" - - def test_maybe_mangle_lambdas_args(self): - func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]} - result = maybe_mangle_lambdas(func) - assert result["A"][0].__name__ == "" - assert result["A"][1].__name__ == "" - - assert func["A"][0](0, 1) == (0, 1, 1) - assert func["A"][0](0, 1, 2) == (0, 1, 2) - assert func["A"][0](0, 2, b=3) == (0, 2, 3) - - def test_maybe_mangle_lambdas_named(self): - func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} - result = maybe_mangle_lambdas(func) - assert result == func - def test_basic(self): df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]}) result = df.groupby("A").agg({"B": [lambda x: 0, lambda x: 1]}) @@ -784,48 +748,3 @@ def test_agg_multiple_lambda(self): weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), ) tm.assert_frame_equal(result2, expected) - - @pytest.mark.parametrize( - "order, expected_reorder", - [ - ( - [ - ("height", ""), - ("height", "max"), - ("weight", "max"), - ("height", ""), - ("weight", ""), - ], - [ - ("height", "_0"), - ("height", "max"), - ("weight", "max"), - ("height", "_1"), - ("weight", ""), - ], - ), - ( - [ - ("col2", "min"), - ("col1", ""), - ("col1", ""), - ("col1", ""), - ], - [ - ("col2", "min"), - ("col1", "_0"), - ("col1", "_1"), - ("col1", "_2"), - ], - ), - ( - [("col", ""), ("col", ""), ("col", "")], - [("col", "_0"), ("col", "_1"), ("col", "_2")], - ), - ], - ) - def test_make_unique(self, order, expected_reorder): - # GH 27519, test if make_unique function reorders correctly - result = _make_unique(order) - - assert result == expected_reorder diff --git a/pandas/tests/test_aggregation.py b/pandas/tests/test_aggregation.py new file mode 100644 index 0000000000000..74ccebc8e2275 --- /dev/null +++ b/pandas/tests/test_aggregation.py @@ -0,0 +1,90 @@ +import numpy as np +import pytest + +from pandas.core.aggregation import _make_unique_kwarg_list, maybe_mangle_lambdas + + +def test_maybe_mangle_lambdas_passthrough(): + assert maybe_mangle_lambdas("mean") == "mean" + assert maybe_mangle_lambdas(lambda x: x).__name__ == "" + # don't mangel single lambda. + assert maybe_mangle_lambdas([lambda x: x])[0].__name__ == "" + + +def test_maybe_mangle_lambdas_listlike(): + aggfuncs = [lambda x: 1, lambda x: 2] + result = maybe_mangle_lambdas(aggfuncs) + assert result[0].__name__ == "" + assert result[1].__name__ == "" + assert aggfuncs[0](None) == result[0](None) + assert aggfuncs[1](None) == result[1](None) + + +def test_maybe_mangle_lambdas(): + func = {"A": [lambda x: 0, lambda x: 1]} + result = maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + +def test_maybe_mangle_lambdas_args(): + func = {"A": [lambda x, a, b=1: (0, a, b), lambda x: 1]} + result = maybe_mangle_lambdas(func) + assert result["A"][0].__name__ == "" + assert result["A"][1].__name__ == "" + + assert func["A"][0](0, 1) == (0, 1, 1) + assert func["A"][0](0, 1, 2) == (0, 1, 2) + assert func["A"][0](0, 2, b=3) == (0, 2, 3) + + +def test_maybe_mangle_lambdas_named(): + func = {"C": np.mean, "D": {"foo": np.mean, "bar": np.mean}} + result = maybe_mangle_lambdas(func) + assert result == func + + +@pytest.mark.parametrize( + "order, expected_reorder", + [ + ( + [ + ("height", ""), + ("height", "max"), + ("weight", "max"), + ("height", ""), + ("weight", ""), + ], + [ + ("height", "_0"), + ("height", "max"), + ("weight", "max"), + ("height", "_1"), + ("weight", ""), + ], + ), + ( + [ + ("col2", "min"), + ("col1", ""), + ("col1", ""), + ("col1", ""), + ], + [ + ("col2", "min"), + ("col1", "_0"), + ("col1", "_1"), + ("col1", "_2"), + ], + ), + ( + [("col", ""), ("col", ""), ("col", "")], + [("col", "_0"), ("col", "_1"), ("col", "_2")], + ), + ], +) +def test_make_unique(order, expected_reorder): + # GH 27519, test if make_unique function reorders correctly + result = _make_unique_kwarg_list(order) + + assert result == expected_reorder From 3a97efedcba7155ce16bd4417e228141e9391373 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 21:42:42 +0100 Subject: [PATCH 06/12] move --- pandas/core/aggregation.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 8fe3fac411cf7..1f07cef7f93f2 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -1,3 +1,8 @@ +""" +aggregation.py contains utility functions to handle multiple named and lambda +kwarg aggregations in groupby and DataFrame/Series aggregation +""" + from collections import defaultdict from functools import partial from typing import Any, DefaultDict, List, Sequence, Tuple @@ -8,12 +13,6 @@ from pandas.core.indexes.api import Index -""" -aggregation.py contains utility functions to handle multiple named and lambda -kwarg aggregations in groupby and DataFrame/Series aggregation -""" - - def is_multi_agg_with_relabel(**kwargs) -> bool: """ Check whether kwargs passed to .agg look like multi-agg with relabeling. From 91a9a7da67f43e03e1e1a2c329393483d0395914 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 22:02:43 +0100 Subject: [PATCH 07/12] fixup --- pandas/core/aggregation.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 1f07cef7f93f2..e544c9abbd721 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -78,7 +78,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i order.append((column, com.get_callable_name(aggfunc) or aggfunc)) # uniquify aggfunc name if duplicated in order list - uniquified_order = _make_unique(order) + uniquified_order = _make_unique_kwarg_list(order) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # uniquified_aggspec will store uniquified order list and will compare it with order @@ -88,19 +88,19 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i for column, aggfuncs in aggspec.items() for aggfunc in aggfuncs ] - uniquified_aggspec = _make_unique(aggspec_order) + uniquified_aggspec = _make_unique_kwarg_list(aggspec_order) # get the new indice of columns by comparison col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) return aggspec, columns, col_idx_order -def _make_unique_kwarg_list(seq: List[tuple]) -> List[tuple]: +def _make_unique_kwarg_list(seq: List[tuple]) -> List[object]: """Uniquify aggfunc name of the pairs in the order list Examples: -------- - >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) + >>> _make_unique_kwarg_list([('a', ''), ('a', ''), ('b', '')]) [('a', '_0'), ('a', '_1'), ('b', '')] """ return [ From 7e9687935f533cfe08408c0b15507b9b6d79665d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 22:03:22 +0100 Subject: [PATCH 08/12] fixup --- pandas/core/aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index e544c9abbd721..6d14888ecbdcf 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -95,7 +95,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i return aggspec, columns, col_idx_order -def _make_unique_kwarg_list(seq: List[tuple]) -> List[object]: +def _make_unique_kwarg_list(seq: List[object]) -> List[object]: """Uniquify aggfunc name of the pairs in the order list Examples: From a63278f3a4eef932c4209d54866183f3f618e6f3 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 22:06:28 +0100 Subject: [PATCH 09/12] fix up --- pandas/core/aggregation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 6d14888ecbdcf..11be022c36636 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -100,7 +100,8 @@ def _make_unique_kwarg_list(seq: List[object]) -> List[object]: Examples: -------- - >>> _make_unique_kwarg_list([('a', ''), ('a', ''), ('b', '')]) + >>> kwarg_list = [('a', ''), ('a', ''), ('b', '')] + >>> _make_unique_kwarg_list(kwarg_list) [('a', '_0'), ('a', '_1'), ('b', '')] """ return [ From d2d2429169c44d118a6543e0316e43c15c82fd80 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 22:26:49 +0100 Subject: [PATCH 10/12] fix typing --- pandas/core/aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index 11be022c36636..c9ca0199584da 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -95,7 +95,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i return aggspec, columns, col_idx_order -def _make_unique_kwarg_list(seq: List[object]) -> List[object]: +def _make_unique_kwarg_list(seq: List[Tuple[Any, Any]]) -> List[Tuple[Any, Any]]: """Uniquify aggfunc name of the pairs in the order list Examples: From 633140840bc473fd5c4762ebb5e2cbc7cc1db65b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 22:29:26 +0100 Subject: [PATCH 11/12] better annotation --- pandas/core/aggregation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index c9ca0199584da..d8a34b9c60ddf 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -95,7 +95,7 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i return aggspec, columns, col_idx_order -def _make_unique_kwarg_list(seq: List[Tuple[Any, Any]]) -> List[Tuple[Any, Any]]: +def _make_unique_kwarg_list(seq: Sequence[Tuple[Any, Any]]) -> Sequence[Tuple[Any, Any]]: """Uniquify aggfunc name of the pairs in the order list Examples: From 64a999181424cf6b9f354505db5b8d509075f89b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 20 Jan 2020 23:05:43 +0100 Subject: [PATCH 12/12] fix black --- pandas/core/aggregation.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/aggregation.py b/pandas/core/aggregation.py index d8a34b9c60ddf..79b87f146b9a7 100644 --- a/pandas/core/aggregation.py +++ b/pandas/core/aggregation.py @@ -95,7 +95,9 @@ def normalize_keyword_aggregation(kwargs: dict) -> Tuple[dict, List[str], List[i return aggspec, columns, col_idx_order -def _make_unique_kwarg_list(seq: Sequence[Tuple[Any, Any]]) -> Sequence[Tuple[Any, Any]]: +def _make_unique_kwarg_list( + seq: Sequence[Tuple[Any, Any]] +) -> Sequence[Tuple[Any, Any]]: """Uniquify aggfunc name of the pairs in the order list Examples: