From 7e461a18d9f6928132afec6f48ce968b3e989ba6 Mon Sep 17 00:00:00 2001 From: Kaiqi Dong Date: Mon, 3 Dec 2018 17:43:52 +0100 Subject: [PATCH 01/21] remove \n from docstring --- pandas/core/arrays/datetimes.py | 26 +++++++++++++------------- pandas/core/arrays/timedeltas.py | 16 ++++++++-------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index cfe3afcf3730a..b3df505d56d78 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -82,7 +82,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -1072,19 +1072,19 @@ def date(self): return tslib.ints_to_pydatetime(timestamps, box="date") - year = _field_accessor('year', 'Y', "\n The year of the datetime\n") + year = _field_accessor('year', 'Y', "The year of the datetime") month = _field_accessor('month', 'M', - "\n The month as January=1, December=12 \n") - day = _field_accessor('day', 'D', "\nThe days of the datetime\n") - hour = _field_accessor('hour', 'h', "\nThe hours of the datetime\n") - minute = _field_accessor('minute', 'm', "\nThe minutes of the datetime\n") - second = _field_accessor('second', 's', "\nThe seconds of the datetime\n") + "The month as January=1, December=12") + day = _field_accessor('day', 'D', "The days of the datetime") + hour = _field_accessor('hour', 'h', "The hours of the datetime") + minute = _field_accessor('minute', 'm', "The minutes of the datetime") + second = _field_accessor('second', 's', "The seconds of the datetime") microsecond = _field_accessor('microsecond', 'us', - "\nThe microseconds of the datetime\n") + "The microseconds of the datetime") nanosecond = _field_accessor('nanosecond', 'ns', - "\nThe nanoseconds of the datetime\n") + "The nanoseconds of the datetime") weekofyear = _field_accessor('weekofyear', 'woy', - "\nThe week ordinal of the year\n") + "The week ordinal of the year") week = weekofyear _dayofweek_doc = """ The day of the week with Monday=0, Sunday=6. @@ -1129,12 +1129,12 @@ def date(self): "The name of day in a week (ex: Friday)\n\n.. deprecated:: 0.23.0") dayofyear = _field_accessor('dayofyear', 'doy', - "\nThe ordinal day of the year\n") - quarter = _field_accessor('quarter', 'q', "\nThe quarter of the date\n") + "The ordinal day of the year") + quarter = _field_accessor('quarter', 'q', "The quarter of the date") days_in_month = _field_accessor( 'days_in_month', 'dim', - "\nThe number of days in the month\n") + "The number of days in the month") daysinmonth = days_in_month _is_month_doc = """ Indicates whether the date is the {first_or_last} day of the month. diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 830283d31a929..4afc9f5483c2a 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -59,7 +59,7 @@ def f(self): return result f.__name__ = name - f.__doc__ = docstring + f.__doc__ = "\n{}\n".format(docstring) return property(f) @@ -684,16 +684,16 @@ def to_pytimedelta(self): return tslibs.ints_to_pytimedelta(self.asi8) days = _field_accessor("days", "days", - "\nNumber of days for each element.\n") + "Number of days for each element.") seconds = _field_accessor("seconds", "seconds", - "\nNumber of seconds (>= 0 and less than 1 day) " - "for each element.\n") + "Number of seconds (>= 0 and less than 1 day) " + "for each element.") microseconds = _field_accessor("microseconds", "microseconds", - "\nNumber of microseconds (>= 0 and less " - "than 1 second) for each element.\n") + "Number of microseconds (>= 0 and less " + "than 1 second) for each element.") nanoseconds = _field_accessor("nanoseconds", "nanoseconds", - "\nNumber of nanoseconds (>= 0 and less " - "than 1 microsecond) for each element.\n") + "Number of nanoseconds (>= 0 and less " + "than 1 microsecond) for each element.") @property def components(self): From e3130838f4bd771b431781c21a18549c05a5f39a Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 14 Aug 2019 23:15:10 +0200 Subject: [PATCH 02/21] fix issue 27519 --- pandas/core/groupby/generic.py | 17 ++++++++++---- .../tests/groupby/aggregate/test_aggregate.py | 22 +++++++++++++++++++ 2 files changed, 35 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index da3db1c18e534..b777f324706da 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -266,7 +266,9 @@ def aggregate(self, func, *args, **kwargs): result.index = np.arange(len(result)) if relabeling: - result = result[order] + + # used reordered index of columns + result = result.iloc[:, order] result.columns = columns return result._convert(datetime=True) @@ -1729,8 +1731,8 @@ def _normalize_keyword_aggregation(kwargs): The transformed kwargs. columns : List[str] The user-provided keys. - order : List[Tuple[str, str]] - Pairs of the input and output column names. + order : List[int] + List of reordered index of columns. Examples -------- @@ -1750,6 +1752,7 @@ def _normalize_keyword_aggregation(kwargs): aggspec = OrderedDict() order = [] columns, pairs = list(zip(*kwargs.items())) + reordered_pairs = [] for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: @@ -1757,7 +1760,13 @@ def _normalize_keyword_aggregation(kwargs): else: aggspec[column] = [aggfunc] order.append((column, com.get_callable_name(aggfunc) or aggfunc)) - return aggspec, columns, order + + for column, aggfuncs in aggspec.items(): + for aggfunc in aggfuncs: + reordered_pairs.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + col_idx_order = [reordered_pairs.index(o) for o in order] + return aggspec, columns, col_idx_order # TODO: Can't use, because mypy doesn't like us setting __name__ diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 52d4fa76bf879..807132afcd68c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -560,3 +560,25 @@ def test_with_kwargs(self): result = pd.Series([1, 2]).groupby([0, 0]).agg([f1, f2], 0, b=10) expected = pd.DataFrame({"": [13], "": [30]}) tm.assert_frame_equal(result, expected) + + def test_agg_lambda(self): + df = pd.DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + result1 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + ) + result2 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + ) + tm.assert_frame_equal(result1, result2) From 197c879ec16de241aadb662cee04e6c937c9fb18 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 14 Aug 2019 23:25:28 +0200 Subject: [PATCH 03/21] Correct tests and add comments --- pandas/core/groupby/generic.py | 3 +++ .../tests/groupby/aggregate/test_aggregate.py | 18 +++++++++++++++++- 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index b777f324706da..c97e2996004b2 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1761,6 +1761,9 @@ def _normalize_keyword_aggregation(kwargs): aggspec[column] = [aggfunc] order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + # GH 25719, due to aggspec will change the order of assigned columns in aggregation + # reordered_pairs will store this reorder and will compare it with order + # based on index, it will obtain new order in index for column, aggfuncs in aggspec.items(): for aggfunc in aggfuncs: reordered_pairs.append((column, com.get_callable_name(aggfunc) or aggfunc)) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 807132afcd68c..accf93e871613 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -562,6 +562,7 @@ def test_with_kwargs(self): tm.assert_frame_equal(result, expected) def test_agg_lambda(self): + # GH 25719 df = pd.DataFrame( { "kind": ["cat", "dog", "cat", "dog"], @@ -569,6 +570,18 @@ def test_agg_lambda(self): "weight": [7.9, 7.5, 9.9, 198.0], } ) + + expected = pd.DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + }, + index=pd.Index(["cat", "dog"], name="kind"), + columns=["height_sqr_min", "height_max", "weight_max"], + ) + + # check pd.NameAgg case result1 = df.groupby(by="kind").agg( height_sqr_min=pd.NamedAgg( column="height", aggfunc=lambda x: np.min(x ** 2) @@ -576,9 +589,12 @@ def test_agg_lambda(self): height_max=pd.NamedAgg(column="height", aggfunc="max"), weight_max=pd.NamedAgg(column="weight", aggfunc="max"), ) + tm.assert_frame_equal(result1, expected) + + # check agg('A' = tuple) case result2 = df.groupby(by="kind").agg( height_sqr_min=("height", lambda x: np.min(x ** 2)), height_max=("height", "max"), weight_max=("weight", "max"), ) - tm.assert_frame_equal(result1, result2) + tm.assert_frame_equal(result2, expected) From c817df2133c8b6533ea65061d22d502adc9cb6b4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Wed, 14 Aug 2019 23:27:49 +0200 Subject: [PATCH 04/21] Add whatsnew note --- doc/source/whatsnew/v1.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index b35f230100f8d..c01c3034c71b3 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -173,6 +173,7 @@ Groupby/resample/rolling - - - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) +- Bug in :meth:`DataFrameGroupby.agg` not able to use lambdas (:issue:`27519`) Reshaping ^^^^^^^^^ From 74d46843e141937ffb5c015851cffea371e0ca6c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 09:02:50 +0200 Subject: [PATCH 05/21] fix test failure --- pandas/tests/groupby/aggregate/test_aggregate.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index accf93e871613..2d815b7f425fc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -571,6 +571,10 @@ def test_agg_lambda(self): } ) + # sort for 35 and earlier + columns = ["height_sqr_min", "height_max", "weight_max"] + if compat.PY35: + columns = ["height_max", "height_sqr_min", "weight_max"] expected = pd.DataFrame( { "height_sqr_min": [82.81, 36.00], @@ -578,7 +582,7 @@ def test_agg_lambda(self): "weight_max": [9.9, 198.0], }, index=pd.Index(["cat", "dog"], name="kind"), - columns=["height_sqr_min", "height_max", "weight_max"], + columns=columns, ) # check pd.NameAgg case From 7df87cb46f10e357f74401661ef90800d786eb1b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 15:47:42 +0200 Subject: [PATCH 06/21] Code change based on review --- doc/source/whatsnew/v1.0.0.rst | 2 +- pandas/core/groupby/generic.py | 11 +++++---- .../tests/groupby/aggregate/test_aggregate.py | 24 ++++++++++++++++--- 3 files changed, 28 insertions(+), 9 deletions(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index c01c3034c71b3..3029c725a03a9 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -173,7 +173,7 @@ Groupby/resample/rolling - - - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) -- Bug in :meth:`DataFrameGroupby.agg` not able to use lambdas (:issue:`27519`) +- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function for ``aggfunc`` argument (:issue:`27519`) Reshaping ^^^^^^^^^ diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 18dc6773b3698..a611f1af380ce 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1734,7 +1734,7 @@ def _normalize_keyword_aggregation(kwargs): columns : List[str] The user-provided keys. order : List[int] - List of reordered index of columns. + List of columns indices. Examples -------- @@ -1754,7 +1754,6 @@ def _normalize_keyword_aggregation(kwargs): aggspec = OrderedDict() order = [] columns, pairs = list(zip(*kwargs.items())) - reordered_pairs = [] for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: @@ -1766,9 +1765,11 @@ def _normalize_keyword_aggregation(kwargs): # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order # based on index, it will obtain new order in index - for column, aggfuncs in aggspec.items(): - for aggfunc in aggfuncs: - reordered_pairs.append((column, com.get_callable_name(aggfunc) or aggfunc)) + reordered_pairs = [ + (column, com.get_callable_name(aggfunc) or aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] col_idx_order = [reordered_pairs.index(o) for o in order] return aggspec, columns, col_idx_order diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 2d815b7f425fc..51d4ab3427119 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -561,8 +561,8 @@ def test_with_kwargs(self): expected = pd.DataFrame({"": [13], "": [30]}) tm.assert_frame_equal(result, expected) - def test_agg_lambda(self): - # GH 25719 + def test_agg_one_lambda(self): + # GH 25719, write tests for DataFrameGroupby.agg with only one lambda df = pd.DataFrame( { "kind": ["cat", "dog", "cat", "dog"], @@ -595,10 +595,28 @@ def test_agg_lambda(self): ) tm.assert_frame_equal(result1, expected) - # check agg('A' = tuple) case + # check agg(key=(col, aggfunc)) case result2 = df.groupby(by="kind").agg( height_sqr_min=("height", lambda x: np.min(x ** 2)), height_max=("height", "max"), weight_max=("weight", "max"), ) tm.assert_frame_equal(result2, expected) + + def test_agg_multiple_lambda(self): + # GH25719, write test for DataFrameGroupby.agg with multiple lambdas + df = pd.DataFrame({"A": [1, 2]}) + expected = pd.DataFrame({"foo": [2], "bar": [2]}, index=pd.Index([1])) + + # check agg(key=(col, aggfunc)) case + result1 = df.groupby([1, 1]).agg( + foo=("A", lambda x: x.max()), bar=("A", lambda x: x.min()) + ) + tm.assert_frame_equal(result1, expected) + + # check pd.NamedAgg case + result2 = df.groupby([1, 1]).agg( + foo=pd.NamedAgg(column="A", aggfunc=lambda x: x.max()), + bar=pd.NamedAgg(column="A", aggfunc=lambda x: x.min()), + ) + tm.assert_frame_equal(result2, expected) From d5e52cbaabd3a13d7a949668ab889f810857a1da Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 16:33:57 +0200 Subject: [PATCH 07/21] Fix case for py35 --- pandas/tests/groupby/aggregate/test_aggregate.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 51d4ab3427119..c5c94e468eae9 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -606,7 +606,10 @@ def test_agg_one_lambda(self): def test_agg_multiple_lambda(self): # GH25719, write test for DataFrameGroupby.agg with multiple lambdas df = pd.DataFrame({"A": [1, 2]}) - expected = pd.DataFrame({"foo": [2], "bar": [2]}, index=pd.Index([1])) + expected_dict = {"foo": [2], "bar": [2]} + if compat.PY35: + expected_dict = {"foo": [1], "bar": [2]} + expected = pd.DataFrame(expected_dict, index=pd.Index([1])) # check agg(key=(col, aggfunc)) case result1 = df.groupby([1, 1]).agg( From 5be9c543830c20ff47636bb0a921a29ef586d585 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 19:10:41 +0200 Subject: [PATCH 08/21] More robust solution --- pandas/core/groupby/generic.py | 50 ++++++++++++++++--- .../tests/groupby/aggregate/test_aggregate.py | 4 +- 2 files changed, 46 insertions(+), 8 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index a611f1af380ce..dd8e081f35724 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1755,26 +1755,64 @@ def _normalize_keyword_aggregation(kwargs): order = [] columns, pairs = list(zip(*kwargs.items())) + repeat_num = 0 for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: aggspec[column].append(aggfunc) else: aggspec[column] = [aggfunc] - order.append((column, com.get_callable_name(aggfunc) or aggfunc)) + + # In case for same column, it uses multiple lambda functions, + # assign them different names to distinguish + if (column, _get_aggfunc_name(aggfunc)) in order: + repeat_num += 1 + order.append((column, _get_aggfunc_name(aggfunc, repeat_num))) + else: + order.append((column, _get_aggfunc_name(aggfunc))) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order # based on index, it will obtain new order in index - reordered_pairs = [ - (column, com.get_callable_name(aggfunc) or aggfunc) - for column, aggfuncs in aggspec.items() - for aggfunc in aggfuncs - ] + reordered_pairs = [] + repeat_num = 0 + for column, aggfuncs in aggspec.items(): + for aggfunc in aggfuncs: + if (column, _get_aggfunc_name(aggfunc)) not in reordered_pairs: + reordered_pairs.append((column, _get_aggfunc_name(aggfunc))) + else: + repeat_num += 1 + reordered_pairs.append((column, _get_aggfunc_name(aggfunc, repeat_num))) col_idx_order = [reordered_pairs.index(o) for o in order] return aggspec, columns, col_idx_order +def _get_aggfunc_name(aggfunc, repeat_num=0): + """ + Return aggfunc name given repeat_num. If aggfunc appears before, then repeat_num + will be given different value, and output aggfunc name will be different + + Parameters: + ---------- + aggfunc: aggfunc + repeat_num: int + How many time the aggfunc used to the same column, + default is 0 + + Returns: + ------- + aggfunc name in string + + """ + if repeat_num == 0: + return com.get_callable_name(aggfunc) or aggfunc + else: + suffix = "_{}".format(repeat_num) + if com.get_callable_name(aggfunc): + return com.get_callable_name(aggfunc) + suffix + return aggfunc + suffix + + # TODO: Can't use, because mypy doesn't like us setting __name__ # error: "partial[Any]" has no attribute "__name__" # the type is: diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c5c94e468eae9..c70c7ac5d07fc 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -606,9 +606,9 @@ def test_agg_one_lambda(self): def test_agg_multiple_lambda(self): # GH25719, write test for DataFrameGroupby.agg with multiple lambdas df = pd.DataFrame({"A": [1, 2]}) - expected_dict = {"foo": [2], "bar": [2]} + expected_dict = {"foo": [2], "bar": [1]} if compat.PY35: - expected_dict = {"foo": [1], "bar": [2]} + expected_dict = {"bar": [1], "foo": [2]} expected = pd.DataFrame(expected_dict, index=pd.Index([1])) # check agg(key=(col, aggfunc)) case From 29d83488e8c79cac7556214ed297081c32121cd9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 20:05:03 +0200 Subject: [PATCH 09/21] Simplify the code --- pandas/core/groupby/generic.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index dd8e081f35724..3dab9f81fda5e 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1755,6 +1755,18 @@ def _normalize_keyword_aggregation(kwargs): order = [] columns, pairs = list(zip(*kwargs.items())) + def _append_order_list(order, aggfunc, column, repeat_num): + """ + Append the order list given the pair of (column, _get_aggfunc_name) + is in the list or not + """ + if (column, _get_aggfunc_name(aggfunc)) in order: + repeat_num += 1 + order.append((column, _get_aggfunc_name(aggfunc, repeat_num))) + else: + order.append((column, _get_aggfunc_name(aggfunc))) + return order + repeat_num = 0 for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: @@ -1764,11 +1776,7 @@ def _normalize_keyword_aggregation(kwargs): # In case for same column, it uses multiple lambda functions, # assign them different names to distinguish - if (column, _get_aggfunc_name(aggfunc)) in order: - repeat_num += 1 - order.append((column, _get_aggfunc_name(aggfunc, repeat_num))) - else: - order.append((column, _get_aggfunc_name(aggfunc))) + order = _append_order_list(order, aggfunc, column, repeat_num) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order @@ -1777,11 +1785,9 @@ def _normalize_keyword_aggregation(kwargs): repeat_num = 0 for column, aggfuncs in aggspec.items(): for aggfunc in aggfuncs: - if (column, _get_aggfunc_name(aggfunc)) not in reordered_pairs: - reordered_pairs.append((column, _get_aggfunc_name(aggfunc))) - else: - repeat_num += 1 - reordered_pairs.append((column, _get_aggfunc_name(aggfunc, repeat_num))) + reordered_pairs = _append_order_list( + reordered_pairs, aggfunc, column, repeat_num + ) col_idx_order = [reordered_pairs.index(o) for o in order] return aggspec, columns, col_idx_order From 275a039ec1756991108dce2b532724ddc57660b9 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Thu, 15 Aug 2019 22:06:15 +0200 Subject: [PATCH 10/21] Optimize the code --- pandas/core/groupby/generic.py | 37 +++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3dab9f81fda5e..fad0208584e8a 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1754,41 +1754,50 @@ def _normalize_keyword_aggregation(kwargs): aggspec = OrderedDict() order = [] columns, pairs = list(zip(*kwargs.items())) + reordered_pairs = [] - def _append_order_list(order, aggfunc, column, repeat_num): + def _append_order_list(order, aggfunc, column, column_dict): """ Append the order list given the pair of (column, _get_aggfunc_name) is in the list or not """ - if (column, _get_aggfunc_name(aggfunc)) in order: - repeat_num += 1 - order.append((column, _get_aggfunc_name(aggfunc, repeat_num))) + col_aggfunc_pair = (column, _get_aggfunc_name(aggfunc)) + # check if the pair not in the order list, if yes, append to order list + # and mark it to 0 + if col_aggfunc_pair not in order: + order.append(col_aggfunc_pair) + column_dict[col_aggfunc_pair] = 0 else: - order.append((column, _get_aggfunc_name(aggfunc))) - return order - repeat_num = 0 + # if pair already in order list, then add the marker by 1, and append + # the aggfunc name by the marker number + column_dict[col_aggfunc_pair] += 1 + order.append( + (column, _get_aggfunc_name(aggfunc, column_dict[col_aggfunc_pair])) + ) + + return order, column_dict + + column_dict = {} for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: aggspec[column].append(aggfunc) else: aggspec[column] = [aggfunc] - # In case for same column, it uses multiple lambda functions, - # assign them different names to distinguish - order = _append_order_list(order, aggfunc, column, repeat_num) + order, column_dict = _append_order_list(order, aggfunc, column, column_dict) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order # based on index, it will obtain new order in index - reordered_pairs = [] - repeat_num = 0 + column_dict = {} for column, aggfuncs in aggspec.items(): for aggfunc in aggfuncs: - reordered_pairs = _append_order_list( - reordered_pairs, aggfunc, column, repeat_num + reordered_pairs, column_dict = _append_order_list( + reordered_pairs, aggfunc, column, column_dict ) + # get the new indice of columns by comparison col_idx_order = [reordered_pairs.index(o) for o in order] return aggspec, columns, col_idx_order From 5dd61dab98f5c96f4946f07064ff91c109fb5e2c Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:31:20 +0200 Subject: [PATCH 11/21] Simplify the code --- pandas/core/groupby/generic.py | 75 +++++++++++----------------------- 1 file changed, 23 insertions(+), 52 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index fad0208584e8a..920fb5ea77707 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1754,78 +1754,49 @@ def _normalize_keyword_aggregation(kwargs): aggspec = OrderedDict() order = [] columns, pairs = list(zip(*kwargs.items())) - reordered_pairs = [] - def _append_order_list(order, aggfunc, column, column_dict): - """ - Append the order list given the pair of (column, _get_aggfunc_name) - is in the list or not - """ - col_aggfunc_pair = (column, _get_aggfunc_name(aggfunc)) - # check if the pair not in the order list, if yes, append to order list - # and mark it to 0 - if col_aggfunc_pair not in order: - order.append(col_aggfunc_pair) - column_dict[col_aggfunc_pair] = 0 - else: - - # if pair already in order list, then add the marker by 1, and append - # the aggfunc name by the marker number - column_dict[col_aggfunc_pair] += 1 - order.append( - (column, _get_aggfunc_name(aggfunc, column_dict[col_aggfunc_pair])) - ) - - return order, column_dict - - column_dict = {} for name, (column, aggfunc) in zip(columns, pairs): if column in aggspec: aggspec[column].append(aggfunc) else: aggspec[column] = [aggfunc] + order.append(_get_aggfunc_column_pair(column, aggfunc)) - order, column_dict = _append_order_list(order, aggfunc, column, column_dict) + # uniquify aggfunc name if duplicated in order list + mangled_order = _uniquify_aggfunc(order) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order # based on index, it will obtain new order in index - column_dict = {} - for column, aggfuncs in aggspec.items(): - for aggfunc in aggfuncs: - reordered_pairs, column_dict = _append_order_list( - reordered_pairs, aggfunc, column, column_dict - ) + aggspec_order = [ + _get_aggfunc_column_pair(column, aggfunc) + for column, aggfuncs in aggspec.items() + for aggfunc in aggfuncs + ] + reordered = _uniquify_aggfunc(aggspec_order) # get the new indice of columns by comparison - col_idx_order = [reordered_pairs.index(o) for o in order] + col_idx_order = [reordered.index(o) for o in mangled_order] return aggspec, columns, col_idx_order -def _get_aggfunc_name(aggfunc, repeat_num=0): - """ - Return aggfunc name given repeat_num. If aggfunc appears before, then repeat_num - will be given different value, and output aggfunc name will be different +def _get_aggfunc_column_pair(column, aggfunc): + """Return (column, aggfunc name) pair""" + return column, com.get_callable_name(aggfunc) or aggfunc - Parameters: - ---------- - aggfunc: aggfunc - repeat_num: int - How many time the aggfunc used to the same column, - default is 0 - Returns: - ------- - aggfunc name in string +def _uniquify_aggfunc(seq): + """Uniquify aggfunc name in the order list + Examples: + -------- + >>> _uniquify_aggfunc([('a', ''), ('a', ''), ('b', '')]) + [('a', '_0'), ('a', '_1'), ('b', '')] """ - if repeat_num == 0: - return com.get_callable_name(aggfunc) or aggfunc - else: - suffix = "_{}".format(repeat_num) - if com.get_callable_name(aggfunc): - return com.get_callable_name(aggfunc) + suffix - return aggfunc + suffix + return [ + (v[0], "_".join([v[1], str(seq[:i].count(v))])) if seq.count(v) > 1 else v + for i, v in enumerate(seq) + ] # TODO: Can't use, because mypy doesn't like us setting __name__ From b5b44e9f35feaca8cf85d6f9dd325b84c209606d Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:34:56 +0200 Subject: [PATCH 12/21] Simplify code --- pandas/core/groupby/generic.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 920fb5ea77707..32fccf9c17a35 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1760,7 +1760,7 @@ def _normalize_keyword_aggregation(kwargs): aggspec[column].append(aggfunc) else: aggspec[column] = [aggfunc] - order.append(_get_aggfunc_column_pair(column, aggfunc)) + order.append((column, com.get_callable_name(aggfunc) or aggfunc)) # uniquify aggfunc name if duplicated in order list mangled_order = _uniquify_aggfunc(order) @@ -1769,7 +1769,7 @@ def _normalize_keyword_aggregation(kwargs): # reordered_pairs will store this reorder and will compare it with order # based on index, it will obtain new order in index aggspec_order = [ - _get_aggfunc_column_pair(column, aggfunc) + (column, com.get_callable_name(aggfunc) or aggfunc) for column, aggfuncs in aggspec.items() for aggfunc in aggfuncs ] @@ -1780,11 +1780,6 @@ def _normalize_keyword_aggregation(kwargs): return aggspec, columns, col_idx_order -def _get_aggfunc_column_pair(column, aggfunc): - """Return (column, aggfunc name) pair""" - return column, com.get_callable_name(aggfunc) or aggfunc - - def _uniquify_aggfunc(seq): """Uniquify aggfunc name in the order list From 473800f701d8a3f04e7f5206321618bd9dab45d7 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:45:49 +0200 Subject: [PATCH 13/21] Add more complicated case to test result --- .../tests/groupby/aggregate/test_aggregate.py | 64 +++++++++++++++---- 1 file changed, 51 insertions(+), 13 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index c70c7ac5d07fc..9282fe12bbc4c 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -561,7 +561,7 @@ def test_with_kwargs(self): expected = pd.DataFrame({"": [13], "": [30]}) tm.assert_frame_equal(result, expected) - def test_agg_one_lambda(self): + def test_agg_with_one_lambda(self): # GH 25719, write tests for DataFrameGroupby.agg with only one lambda df = pd.DataFrame( { @@ -604,22 +604,60 @@ def test_agg_one_lambda(self): tm.assert_frame_equal(result2, expected) def test_agg_multiple_lambda(self): - # GH25719, write test for DataFrameGroupby.agg with multiple lambdas - df = pd.DataFrame({"A": [1, 2]}) - expected_dict = {"foo": [2], "bar": [1]} + # GH25719, write test for DataFrameGroupby.agg with multiple lambdas with mixed aggfunc + df = pd.DataFrame( + { + "kind": ["cat", "dog", "cat", "dog"], + "height": [9.1, 6.0, 9.5, 34.0], + "weight": [7.9, 7.5, 9.9, 198.0], + } + ) + # sort for 35 and earlier + columns = [ + "height_sqr_min", + "height_max", + "weight_max", + "height_max_2", + "weight_min", + ] if compat.PY35: - expected_dict = {"bar": [1], "foo": [2]} - expected = pd.DataFrame(expected_dict, index=pd.Index([1])) + columns = [ + "height_max", + "height_max_2", + "height_sqr_min", + "weight_max", + "weight_min", + ] + expected = pd.DataFrame( + { + "height_sqr_min": [82.81, 36.00], + "height_max": [9.5, 34.0], + "weight_max": [9.9, 198.0], + "height_max_2": [9.5, 34.0], + "weight_min": [7.9, 7.5], + }, + index=pd.Index(["cat", "dog"], name="kind"), + columns=columns, + ) - # check agg(key=(col, aggfunc)) case - result1 = df.groupby([1, 1]).agg( - foo=("A", lambda x: x.max()), bar=("A", lambda x: x.min()) + # check pd.NamedAgg case + result1 = df.groupby(by="kind").agg( + height_sqr_min=("height", lambda x: np.min(x ** 2)), + height_max=("height", "max"), + weight_max=("weight", "max"), + height_max_2=("height", lambda x: np.max(x)), + weight_min=("weight", lambda x: np.min(x)), ) tm.assert_frame_equal(result1, expected) - # check pd.NamedAgg case - result2 = df.groupby([1, 1]).agg( - foo=pd.NamedAgg(column="A", aggfunc=lambda x: x.max()), - bar=pd.NamedAgg(column="A", aggfunc=lambda x: x.min()), + # check agg(key=(col, aggfunc)) case + result2 = df.groupby(by="kind").agg( + height_sqr_min=pd.NamedAgg( + column="height", aggfunc=lambda x: np.min(x ** 2) + ), + height_max=pd.NamedAgg(column="height", aggfunc="max"), + weight_max=pd.NamedAgg(column="weight", aggfunc="max"), + height_max_2=pd.NamedAgg(column="height", aggfunc=lambda x: np.max(x)), + weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), ) tm.assert_frame_equal(result2, expected) From bad1d729727b5ebca05f8c1f678f35123f580e6b Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:47:54 +0200 Subject: [PATCH 14/21] Rename to make variable name meaningful --- pandas/core/groupby/generic.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 32fccf9c17a35..3c2cd6ce53ddd 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1763,7 +1763,7 @@ def _normalize_keyword_aggregation(kwargs): order.append((column, com.get_callable_name(aggfunc) or aggfunc)) # uniquify aggfunc name if duplicated in order list - mangled_order = _uniquify_aggfunc(order) + uniquified_order = _uniquify_aggfunc(order) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # reordered_pairs will store this reorder and will compare it with order @@ -1773,10 +1773,10 @@ def _normalize_keyword_aggregation(kwargs): for column, aggfuncs in aggspec.items() for aggfunc in aggfuncs ] - reordered = _uniquify_aggfunc(aggspec_order) + uniquified_aggspec = _uniquify_aggfunc(aggspec_order) # get the new indice of columns by comparison - col_idx_order = [reordered.index(o) for o in mangled_order] + col_idx_order = [uniquified_aggspec.index(o) for o in uniquified_order] return aggspec, columns, col_idx_order From 60e426a6d1d266d0d734a0758c6a1a80a0658651 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:50:01 +0200 Subject: [PATCH 15/21] Fix linting --- pandas/tests/groupby/aggregate/test_aggregate.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 9282fe12bbc4c..65bd85a380488 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -604,7 +604,8 @@ def test_agg_with_one_lambda(self): tm.assert_frame_equal(result2, expected) def test_agg_multiple_lambda(self): - # GH25719, write test for DataFrameGroupby.agg with multiple lambdas with mixed aggfunc + # GH25719, test for DataFrameGroupby.agg with multiple lambdas + # with mixed aggfunc df = pd.DataFrame( { "kind": ["cat", "dog", "cat", "dog"], From 943437adbcfaebf457849f72639c595644335f61 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 21:57:40 +0200 Subject: [PATCH 16/21] Self review on code quality --- pandas/core/groupby/generic.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 3c2cd6ce53ddd..ff9c7bff9cc10 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1733,7 +1733,7 @@ def _normalize_keyword_aggregation(kwargs): The transformed kwargs. columns : List[str] The user-provided keys. - order : List[int] + col_idx_order : List[int] List of columns indices. Examples @@ -1781,7 +1781,7 @@ def _normalize_keyword_aggregation(kwargs): def _uniquify_aggfunc(seq): - """Uniquify aggfunc name in the order list + """Uniquify aggfunc name of the pairs in the order list Examples: -------- @@ -1789,8 +1789,10 @@ def _uniquify_aggfunc(seq): [('a', '_0'), ('a', '_1'), ('b', '')] """ return [ - (v[0], "_".join([v[1], str(seq[:i].count(v))])) if seq.count(v) > 1 else v - for i, v in enumerate(seq) + (pair[0], "_".join([pair[1], str(seq[:i].count(pair))])) + if seq.count(pair) > 1 + else pair + for i, pair in enumerate(seq) ] From a3ba0614b31a1aeece2d5907b100ddd4680bafb4 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Fri, 16 Aug 2019 22:00:09 +0200 Subject: [PATCH 17/21] Modify commet --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ff9c7bff9cc10..efa8554b0c4c8 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1766,8 +1766,8 @@ def _normalize_keyword_aggregation(kwargs): uniquified_order = _uniquify_aggfunc(order) # GH 25719, due to aggspec will change the order of assigned columns in aggregation - # reordered_pairs will store this reorder and will compare it with order - # based on index, it will obtain new order in index + # uniquified_aggspec will store uniquified order list and will compare it with order + # based on index aggspec_order = [ (column, com.get_callable_name(aggfunc) or aggfunc) for column, aggfuncs in aggspec.items() From aabfcd2838ca3f2b1805a42150fd0eca6577c793 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 19 Aug 2019 16:19:25 +0200 Subject: [PATCH 18/21] Update doc --- doc/source/whatsnew/v1.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.0.0.rst b/doc/source/whatsnew/v1.0.0.rst index 9935ef0899081..efc2527636fc4 100644 --- a/doc/source/whatsnew/v1.0.0.rst +++ b/doc/source/whatsnew/v1.0.0.rst @@ -175,7 +175,7 @@ Groupby/resample/rolling - - - Bug in :meth:`DataFrame.groupby` not offering selection by column name when ``axis=1`` (:issue:`27614`) -- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function for ``aggfunc`` argument (:issue:`27519`) +- Bug in :meth:`DataFrameGroupby.agg` not able to use lambda function with named aggregation (:issue:`27519`) Reshaping ^^^^^^^^^ From 0950bc45c1c202528d7ebb6519286174e500f5e6 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 19 Aug 2019 16:26:31 +0200 Subject: [PATCH 19/21] Better python --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index efa8554b0c4c8..511d90031494f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1776,7 +1776,7 @@ def _normalize_keyword_aggregation(kwargs): uniquified_aggspec = _uniquify_aggfunc(aggspec_order) # get the new indice of columns by comparison - col_idx_order = [uniquified_aggspec.index(o) for o in uniquified_order] + col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) return aggspec, columns, col_idx_order From fe33469b664434b5a7bc8a4486cc4561cea8a3ee Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 26 Aug 2019 18:54:56 +0200 Subject: [PATCH 20/21] Add test for make_unique --- pandas/core/groupby/generic.py | 8 +-- .../tests/groupby/aggregate/test_aggregate.py | 51 +++++++++++++++++-- 2 files changed, 52 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 511d90031494f..e3bc90f49a08f 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1763,7 +1763,7 @@ def _normalize_keyword_aggregation(kwargs): order.append((column, com.get_callable_name(aggfunc) or aggfunc)) # uniquify aggfunc name if duplicated in order list - uniquified_order = _uniquify_aggfunc(order) + uniquified_order = _make_unique(order) # GH 25719, due to aggspec will change the order of assigned columns in aggregation # uniquified_aggspec will store uniquified order list and will compare it with order @@ -1773,19 +1773,19 @@ def _normalize_keyword_aggregation(kwargs): for column, aggfuncs in aggspec.items() for aggfunc in aggfuncs ] - uniquified_aggspec = _uniquify_aggfunc(aggspec_order) + uniquified_aggspec = _make_unique(aggspec_order) # get the new indice of columns by comparison col_idx_order = Index(uniquified_aggspec).get_indexer(uniquified_order) return aggspec, columns, col_idx_order -def _uniquify_aggfunc(seq): +def _make_unique(seq): """Uniquify aggfunc name of the pairs in the order list Examples: -------- - >>> _uniquify_aggfunc([('a', ''), ('a', ''), ('b', '')]) + >>> _make_unique([('a', ''), ('a', ''), ('b', '')]) [('a', '_0'), ('a', '_1'), ('b', '')] """ return [ diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 65bd85a380488..3b0a67d732009 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, compat, concat from pandas.core.base import SpecificationError -from pandas.core.groupby.generic import _maybe_mangle_lambdas +from pandas.core.groupby.generic import _maybe_mangle_lambdas, _make_unique from pandas.core.groupby.grouper import Grouping import pandas.util.testing as tm @@ -641,7 +641,7 @@ def test_agg_multiple_lambda(self): columns=columns, ) - # check pd.NamedAgg case + # check agg(key=(col, aggfunc)) case result1 = df.groupby(by="kind").agg( height_sqr_min=("height", lambda x: np.min(x ** 2)), height_max=("height", "max"), @@ -651,7 +651,7 @@ def test_agg_multiple_lambda(self): ) tm.assert_frame_equal(result1, expected) - # check agg(key=(col, aggfunc)) case + # check pd.NamedAgg case result2 = df.groupby(by="kind").agg( height_sqr_min=pd.NamedAgg( column="height", aggfunc=lambda x: np.min(x ** 2) @@ -662,3 +662,48 @@ def test_agg_multiple_lambda(self): weight_min=pd.NamedAgg(column="weight", aggfunc=lambda x: np.min(x)), ) tm.assert_frame_equal(result2, expected) + + @pytest.mark.parametrize( + "order, expected_reorder", + [ + ( + [ + ("height", ""), + ("height", "max"), + ("weight", "max"), + ("height", ""), + ("weight", ""), + ], + [ + ("height", "_0"), + ("height", "max"), + ("weight", "max"), + ("height", "_1"), + ("weight", ""), + ], + ), + ( + [ + ("col2", "min"), + ("col1", ""), + ("col1", ""), + ("col1", ""), + ], + [ + ("col2", "min"), + ("col1", "_0"), + ("col1", "_1"), + ("col1", "_2"), + ], + ), + ( + [("col", ""), ("col", ""), ("col", "")], + [("col", "_0"), ("col", "_1"), ("col", "_2")], + ), + ], + ) + def test_make_unique(self, order, expected_reorder): + # GH 27519, test if make_unique function reorders correctly + result = _make_unique(order) + + assert result == expected_reorder From ace9035e58c7b7470c039f5ef80051fe9a298a21 Mon Sep 17 00:00:00 2001 From: Kaiqi Date: Mon, 26 Aug 2019 20:03:01 +0200 Subject: [PATCH 21/21] fix linting --- pandas/tests/groupby/aggregate/test_aggregate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 3b0a67d732009..aa80c461a00e7 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -10,7 +10,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series, compat, concat from pandas.core.base import SpecificationError -from pandas.core.groupby.generic import _maybe_mangle_lambdas, _make_unique +from pandas.core.groupby.generic import _make_unique, _maybe_mangle_lambdas from pandas.core.groupby.grouper import Grouping import pandas.util.testing as tm