From 760f8fdb245d9f95fb23c31b315d24985baab904 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Jun 2021 10:23:00 -0700 Subject: [PATCH 1/4] BUG: passing str to GroupBy.apply --- pandas/core/groupby/groupby.py | 10 +++ pandas/tests/groupby/test_groupby.py | 121 ++++++++++++--------------- 2 files changed, 64 insertions(+), 67 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index f694dcce809ea..62b38a7f20f54 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1245,6 +1245,16 @@ def f(g): "func must be a callable if args or kwargs are supplied" ) else: + if isinstance(func, str): + if hasattr(self, func): + res = getattr(self, func) + if inspect.isfunction(res) or inspect.ismethod(res): + return res() + return res + + else: + raise TypeError(f"apply func should be callable, not '{func}'") + f = func # ignore SettingWithCopy here in case the user mutates diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 382a940d2a92c..9ab02a0584aa8 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1768,13 +1768,9 @@ def test_empty_groupby(columns, keys, values, method, op, request): isinstance(values, Categorical) and not isinstance(columns, list) and op in ["sum", "prod"] - and method != "apply" ): # handled below GH#41291 pass - elif isinstance(values, Categorical) and len(keys) == 1 and method == "apply": - mark = pytest.mark.xfail(raises=TypeError, match="'str' object is not callable") - request.node.add_marker(mark) elif ( isinstance(values, Categorical) and len(keys) == 1 @@ -1806,21 +1802,16 @@ def test_empty_groupby(columns, keys, values, method, op, request): isinstance(values, Categorical) and len(keys) == 2 and op in ["min", "max", "sum"] - and method != "apply" ): mark = pytest.mark.xfail( raises=AssertionError, match="(DataFrame|Series) are different" ) request.node.add_marker(mark) - elif ( - isinstance(values, pd.core.arrays.BooleanArray) - and op in ["sum", "prod"] - and method != "apply" - ): + elif isinstance(values, pd.core.arrays.BooleanArray) and op in ["sum", "prod"]: # We expect to get Int64 back for these override_dtype = "Int64" - if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply": + if isinstance(values[0], bool) and op in ("prod", "sum"): # sum/product of bools is an integer override_dtype = "int64" @@ -1844,66 +1835,62 @@ def get_result(): # i.e. SeriesGroupBy if op in ["prod", "sum"]: # ops that require more than just ordered-ness - if method != "apply": - # FIXME: apply goes through different code path - if df.dtypes[0].kind == "M": - # GH#41291 - # datetime64 -> prod and sum are invalid - msg = "datetime64 type does not support" - with pytest.raises(TypeError, match=msg): - get_result() - - return - elif isinstance(values, Categorical): - # GH#41291 - msg = "category type does not support" - with pytest.raises(TypeError, match=msg): - get_result() - - return + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + msg = "datetime64 type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return + elif isinstance(values, Categorical): + # GH#41291 + msg = "category type does not support" + with pytest.raises(TypeError, match=msg): + get_result() + + return else: # ie. DataFrameGroupBy if op in ["prod", "sum"]: # ops that require more than just ordered-ness - if method != "apply": - # FIXME: apply goes through different code path - if df.dtypes[0].kind == "M": - # GH#41291 - # datetime64 -> prod and sum are invalid - result = get_result() - - # with numeric_only=True, these are dropped, and we get - # an empty DataFrame back - expected = df.set_index(keys)[[]] - tm.assert_equal(result, expected) - return - - elif isinstance(values, Categorical): - # GH#41291 - # Categorical doesn't implement sum or prod - result = get_result() - - # with numeric_only=True, these are dropped, and we get - # an empty DataFrame back - expected = df.set_index(keys)[[]] - if len(keys) != 1 and op == "prod": - # TODO: why just prod and not sum? - # Categorical is special without 'observed=True' - lev = Categorical([0], dtype=values.dtype) - mi = MultiIndex.from_product([lev, lev], names=["A", "B"]) - expected = DataFrame([], columns=[], index=mi) - - tm.assert_equal(result, expected) - return - - elif df.dtypes[0] == object: - # FIXME: the test is actually wrong here, xref #41341 - result = get_result() - # In this case we have list-of-list, will raise TypeError, - # and subsequently be dropped as nuisance columns - expected = df.set_index(keys)[[]] - tm.assert_equal(result, expected) - return + if df.dtypes[0].kind == "M": + # GH#41291 + # datetime64 -> prod and sum are invalid + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return + + elif isinstance(values, Categorical): + # GH#41291 + # Categorical doesn't implement sum or prod + result = get_result() + + # with numeric_only=True, these are dropped, and we get + # an empty DataFrame back + expected = df.set_index(keys)[[]] + if len(keys) != 1 and op == "prod": + # TODO: why just prod and not sum? + # Categorical is special without 'observed=True' + lev = Categorical([0], dtype=values.dtype) + mi = MultiIndex.from_product([lev, lev], names=["A", "B"]) + expected = DataFrame([], columns=[], index=mi) + + tm.assert_equal(result, expected) + return + + elif df.dtypes[0] == object: + # FIXME: the test is actually wrong here, xref #41341 + result = get_result() + # In this case we have list-of-list, will raise TypeError, + # and subsequently be dropped as nuisance columns + expected = df.set_index(keys)[[]] + tm.assert_equal(result, expected) + return result = get_result() expected = df.set_index(keys)[columns] From 10bfb8c7e4f36cb406687fc658cfced2e4e53f0c Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Jun 2021 10:24:39 -0700 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 166ea2f0d4164..cd2f8936ca047 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -187,7 +187,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- +- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`??`) - Reshaping From 04bcb2811c28cc125b4933ed7dd7e8cdd83d15b2 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Jun 2021 10:26:06 -0700 Subject: [PATCH 3/4] GH ref --- doc/source/whatsnew/v1.4.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index cd2f8936ca047..b39dc2a3e3f0c 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -187,7 +187,7 @@ Plotting Groupby/resample/rolling ^^^^^^^^^^^^^^^^^^^^^^^^ -- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`??`) +- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`) - Reshaping From 6cedf1d517ee2b9c2f055ececebde4cf16fade97 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Jun 2021 19:51:21 -0700 Subject: [PATCH 4/4] if->elif --- pandas/core/groupby/groupby.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 62b38a7f20f54..f3f60880a1245 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1244,16 +1244,16 @@ def f(g): raise ValueError( "func must be a callable if args or kwargs are supplied" ) - else: - if isinstance(func, str): - if hasattr(self, func): - res = getattr(self, func) - if inspect.isfunction(res) or inspect.ismethod(res): - return res() - return res + elif isinstance(func, str): + if hasattr(self, func): + res = getattr(self, func) + if callable(res): + return res() + return res - else: - raise TypeError(f"apply func should be callable, not '{func}'") + else: + raise TypeError(f"apply func should be callable, not '{func}'") + else: f = func