Skip to content

BUG: Remove error raise to allow same input function on the same column in named aggregation #28428

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 36 commits into from
Dec 8, 2019
Merged
Show file tree
Hide file tree
Changes from 26 commits
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
7e461a1
remove \n from docstring
charlesdong1991 Dec 3, 2018
1314059
fix conflicts
charlesdong1991 Jan 19, 2019
8bcb313
Merge remote-tracking branch 'upstream/master'
charlesdong1991 Jul 30, 2019
07c6003
Merge remote-tracking branch 'upstream/master' into fix_issue_28426
charlesdong1991 Sep 13, 2019
495f4ca
Remove error raising
charlesdong1991 Sep 13, 2019
65f3a34
better test case
charlesdong1991 Sep 13, 2019
b985055
Fix test for series
charlesdong1991 Sep 13, 2019
fb43351
Add whatsnew
charlesdong1991 Sep 13, 2019
2bc05c3
better english
charlesdong1991 Sep 13, 2019
39d3aee
more accurate explanation
charlesdong1991 Sep 13, 2019
df51bf9
Remove unused imports
charlesdong1991 Sep 13, 2019
d162b3e
code change based on review
charlesdong1991 Sep 14, 2019
31e8079
fix test
charlesdong1991 Sep 14, 2019
7e87435
code change and reformat
charlesdong1991 Sep 15, 2019
1361236
move whatsnew to 1.0.0
charlesdong1991 Sep 16, 2019
3815507
add test
charlesdong1991 Sep 17, 2019
8a9c41b
bold change
charlesdong1991 Sep 17, 2019
6eeb978
remove unused comment
charlesdong1991 Sep 17, 2019
2ae946e
remove print
charlesdong1991 Sep 18, 2019
2e5fd6e
revert blank line change
charlesdong1991 Sep 18, 2019
b10b4b8
revert previous change
charlesdong1991 Sep 22, 2019
314eb28
revert unnecessary white space removal
charlesdong1991 Sep 22, 2019
3fc2d71
fix conflicts and merge master
charlesdong1991 Oct 19, 2019
fd485d7
fix tests
charlesdong1991 Oct 19, 2019
5817f5e
fix test
charlesdong1991 Oct 19, 2019
84f2734
fix conflict and repush
charlesdong1991 Nov 8, 2019
0e249fc
move to groupby section
charlesdong1991 Nov 8, 2019
6331a26
Merge remote-tracking branch 'upstream/master' into fix_issue_28426
charlesdong1991 Nov 8, 2019
5e1e4e0
merge master
charlesdong1991 Nov 20, 2019
f7a219c
fix test for multiindex
charlesdong1991 Nov 20, 2019
cef6d3a
reformatting
charlesdong1991 Nov 20, 2019
2c11c36
solve conflicts
charlesdong1991 Nov 22, 2019
5172f46
solve conflict
charlesdong1991 Nov 22, 2019
6c7ee71
merge master and fix conflict
charlesdong1991 Dec 2, 2019
90ba1b4
remove redundant and expand whatsnew note
charlesdong1991 Dec 2, 2019
8eaecee
Merge remote-tracking branch 'upstream/master' into fix_issue_28426
charlesdong1991 Dec 3, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -456,6 +456,7 @@ Other
- :meth:`DataFrame.to_csv` and :meth:`Series.to_csv` now support dicts as ``compression`` argument with key ``'method'`` being the compression method and others as additional compression options when the compression method is ``'zip'``. (:issue:`26023`)
- Bug in :meth:`Series.diff` where a boolean series would incorrectly raise a ``TypeError`` (:issue:`17294`)
- :meth:`Series.append` will no longer raise a ``TypeError`` when passed a tuple of ``Series`` (:issue:`28410`)
- Remove error raised due to duplicated input functions in named aggregation in :meth:`DataFrame.groupby` and :meth:`Series.groupby` (:issue:`28426`)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you move this to groupby (and the groupby one right below).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for your comment! moved! @jreback

- :meth:`SeriesGroupBy.value_counts` will be able to handle the case even when the :class:`Grouper` makes empty groups (:issue: 28479)
- Fix corrupted error message when calling ``pandas.libs._json.encode()`` on a 0d array (:issue:`18878`)
- Fix :class:`AbstractHolidayCalendar` to return correct results for
Expand Down
13 changes: 8 additions & 5 deletions pandas/core/groupby/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -321,11 +321,6 @@ def _aggregate_multiple_funcs(self, arg, _level):
results = OrderedDict()
for name, func in arg:
obj = self
if name in results:
raise SpecificationError(
"Function names must be unique, found multiple named "
"{name}".format(name=name)
)

# reset the cache so that we
# only include the named selection
Expand Down Expand Up @@ -871,6 +866,14 @@ def aggregate(self, func=None, *args, **kwargs):
func, columns, order = _normalize_keyword_aggregation(kwargs)

kwargs = {}
elif isinstance(func, list) and len(func) > len(set(func)):

# GH 28426 will raise error if duplicated function names are used and
# there is no reassigned name
raise SpecificationError(
"Function names must be unique if there is no new column "
"names assigned"
)
elif func is None:
# nicer error message
raise TypeError("Must provide 'func' or tuples of '(column, aggfunc).")
Expand Down
50 changes: 40 additions & 10 deletions pandas/tests/groupby/aggregate/test_aggregate.py
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,14 @@ def test_uint64_type_handling(dtype, how):
tm.assert_frame_equal(result, expected, check_exact=True)


def test_func_duplicates_raises():
# GH28426
msg = "Function names"
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
with pytest.raises(SpecificationError, match=msg):
df.groupby("A").agg(["min", "min"])


class TestNamedAggregationSeries:
def test_series_named_agg(self):
df = pd.Series([1, 2, 3, 4])
Expand All @@ -376,12 +384,12 @@ def test_no_args_raises(self):
expected = pd.DataFrame()
tm.assert_frame_equal(result, expected)

def test_series_named_agg_duplicates_raises(self):
# This is a limitation of the named agg implementation reusing
# aggregate_multiple_funcs. It could maybe be lifted in the future.
def test_series_named_agg_duplicates_no_raises(self):
# GH28426
gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
with pytest.raises(SpecificationError):
gr.agg(a="sum", b="sum")
grouped = gr.agg(a="sum", b="sum")
expected = pd.DataFrame({"a": [3, 3], "b": [3, 3]})
tm.assert_frame_equal(expected, grouped)

def test_mangled(self):
gr = pd.Series([1, 2, 3]).groupby([0, 0, 1])
Expand Down Expand Up @@ -440,12 +448,34 @@ def test_agg_relabel_non_identifier(self):
)
tm.assert_frame_equal(result, expected)

def test_duplicate_raises(self):
# TODO: we currently raise on multiple lambdas. We could *maybe*
# update com.get_callable_name to append `_i` to each lambda.
def test_duplicate_no_raises(self):
# GH 28426, if use same input function on same column,
# no error should raise
df = pd.DataFrame({"A": [0, 0, 1, 1], "B": [1, 2, 3, 4]})
with pytest.raises(SpecificationError, match="Function names"):
df.groupby("A").agg(a=("A", "min"), b=("A", "min"))

grouped = df.groupby("A").agg(a=("B", "min"), b=("B", "min"))
expected = pd.DataFrame(
{"a": [1, 3], "b": [1, 3]}, index=pd.Index([0, 1], name="A")
)
tm.assert_frame_equal(grouped, expected)

quant50 = functools.partial(np.percentile, q=50)
quant70 = functools.partial(np.percentile, q=70)
quant50.__name__ = "quant50"
quant70.__name__ = "quant70"

test = pd.DataFrame(
{"col1": ["a", "a", "b", "b", "b"], "col2": [1, 2, 3, 4, 5]}
)

grouped = test.groupby("col1").agg(
quantile_50=("col2", quant50), quantile_70=("col2", quant70)
)
expected = pd.DataFrame(
{"quantile_50": [1.5, 4.0], "quantile_70": [1.7, 4.4]},
index=pd.Index(["a", "b"], name="col1"),
)
tm.assert_frame_equal(grouped, expected)

def test_agg_relabel_with_level(self):
df = pd.DataFrame(
Expand Down