Skip to content

Commit 16a8f0f

Browse files
authored
Merge pull request #225 from pandas-dev/master
BUG: passing str to GroupBy.apply (pandas-dev#42021)
2 parents 7562f93 + 1cbf344 commit 16a8f0f

File tree

3 files changed

+65
-67
lines changed

3 files changed

+65
-67
lines changed

doc/source/whatsnew/v1.4.0.rst

+1
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,7 @@ Plotting
256256

257257
Groupby/resample/rolling
258258
^^^^^^^^^^^^^^^^^^^^^^^^
259+
- Fixed bug in :meth:`SeriesGroupBy.apply` where passing an unrecognized string argument failed to raise ``TypeError`` when the underlying ``Series`` is empty (:issue:`42021`)
259260
- Bug in :meth:`Series.rolling.apply`, :meth:`DataFrame.rolling.apply`, :meth:`Series.expanding.apply` and :meth:`DataFrame.expanding.apply` with ``engine="numba"`` where ``*args`` were being cached with the user passed function (:issue:`42287`)
260261
-
261262

pandas/core/groupby/groupby.py

+10
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,17 @@ def f(g):
12421242
raise ValueError(
12431243
"func must be a callable if args or kwargs are supplied"
12441244
)
1245+
elif isinstance(func, str):
1246+
if hasattr(self, func):
1247+
res = getattr(self, func)
1248+
if callable(res):
1249+
return res()
1250+
return res
1251+
1252+
else:
1253+
raise TypeError(f"apply func should be callable, not '{func}'")
12451254
else:
1255+
12461256
f = func
12471257

12481258
# ignore SettingWithCopy here in case the user mutates

pandas/tests/groupby/test_groupby.py

+54-67
Original file line numberDiff line numberDiff line change
@@ -1770,13 +1770,9 @@ def test_empty_groupby(columns, keys, values, method, op, request):
17701770
isinstance(values, Categorical)
17711771
and not isinstance(columns, list)
17721772
and op in ["sum", "prod"]
1773-
and method != "apply"
17741773
):
17751774
# handled below GH#41291
17761775
pass
1777-
elif isinstance(values, Categorical) and len(keys) == 1 and method == "apply":
1778-
mark = pytest.mark.xfail(raises=TypeError, match="'str' object is not callable")
1779-
request.node.add_marker(mark)
17801776
elif (
17811777
isinstance(values, Categorical)
17821778
and len(keys) == 1
@@ -1808,21 +1804,16 @@ def test_empty_groupby(columns, keys, values, method, op, request):
18081804
isinstance(values, Categorical)
18091805
and len(keys) == 2
18101806
and op in ["min", "max", "sum"]
1811-
and method != "apply"
18121807
):
18131808
mark = pytest.mark.xfail(
18141809
raises=AssertionError, match="(DataFrame|Series) are different"
18151810
)
18161811
request.node.add_marker(mark)
1817-
elif (
1818-
isinstance(values, pd.core.arrays.BooleanArray)
1819-
and op in ["sum", "prod"]
1820-
and method != "apply"
1821-
):
1812+
elif isinstance(values, pd.core.arrays.BooleanArray) and op in ["sum", "prod"]:
18221813
# We expect to get Int64 back for these
18231814
override_dtype = "Int64"
18241815

1825-
if isinstance(values[0], bool) and op in ("prod", "sum") and method != "apply":
1816+
if isinstance(values[0], bool) and op in ("prod", "sum"):
18261817
# sum/product of bools is an integer
18271818
override_dtype = "int64"
18281819

@@ -1846,66 +1837,62 @@ def get_result():
18461837
# i.e. SeriesGroupBy
18471838
if op in ["prod", "sum"]:
18481839
# ops that require more than just ordered-ness
1849-
if method != "apply":
1850-
# FIXME: apply goes through different code path
1851-
if df.dtypes[0].kind == "M":
1852-
# GH#41291
1853-
# datetime64 -> prod and sum are invalid
1854-
msg = "datetime64 type does not support"
1855-
with pytest.raises(TypeError, match=msg):
1856-
get_result()
1857-
1858-
return
1859-
elif isinstance(values, Categorical):
1860-
# GH#41291
1861-
msg = "category type does not support"
1862-
with pytest.raises(TypeError, match=msg):
1863-
get_result()
1864-
1865-
return
1840+
if df.dtypes[0].kind == "M":
1841+
# GH#41291
1842+
# datetime64 -> prod and sum are invalid
1843+
msg = "datetime64 type does not support"
1844+
with pytest.raises(TypeError, match=msg):
1845+
get_result()
1846+
1847+
return
1848+
elif isinstance(values, Categorical):
1849+
# GH#41291
1850+
msg = "category type does not support"
1851+
with pytest.raises(TypeError, match=msg):
1852+
get_result()
1853+
1854+
return
18661855
else:
18671856
# ie. DataFrameGroupBy
18681857
if op in ["prod", "sum"]:
18691858
# ops that require more than just ordered-ness
1870-
if method != "apply":
1871-
# FIXME: apply goes through different code path
1872-
if df.dtypes[0].kind == "M":
1873-
# GH#41291
1874-
# datetime64 -> prod and sum are invalid
1875-
result = get_result()
1876-
1877-
# with numeric_only=True, these are dropped, and we get
1878-
# an empty DataFrame back
1879-
expected = df.set_index(keys)[[]]
1880-
tm.assert_equal(result, expected)
1881-
return
1882-
1883-
elif isinstance(values, Categorical):
1884-
# GH#41291
1885-
# Categorical doesn't implement sum or prod
1886-
result = get_result()
1887-
1888-
# with numeric_only=True, these are dropped, and we get
1889-
# an empty DataFrame back
1890-
expected = df.set_index(keys)[[]]
1891-
if len(keys) != 1 and op == "prod":
1892-
# TODO: why just prod and not sum?
1893-
# Categorical is special without 'observed=True'
1894-
lev = Categorical([0], dtype=values.dtype)
1895-
mi = MultiIndex.from_product([lev, lev], names=["A", "B"])
1896-
expected = DataFrame([], columns=[], index=mi)
1897-
1898-
tm.assert_equal(result, expected)
1899-
return
1900-
1901-
elif df.dtypes[0] == object:
1902-
# FIXME: the test is actually wrong here, xref #41341
1903-
result = get_result()
1904-
# In this case we have list-of-list, will raise TypeError,
1905-
# and subsequently be dropped as nuisance columns
1906-
expected = df.set_index(keys)[[]]
1907-
tm.assert_equal(result, expected)
1908-
return
1859+
if df.dtypes[0].kind == "M":
1860+
# GH#41291
1861+
# datetime64 -> prod and sum are invalid
1862+
result = get_result()
1863+
1864+
# with numeric_only=True, these are dropped, and we get
1865+
# an empty DataFrame back
1866+
expected = df.set_index(keys)[[]]
1867+
tm.assert_equal(result, expected)
1868+
return
1869+
1870+
elif isinstance(values, Categorical):
1871+
# GH#41291
1872+
# Categorical doesn't implement sum or prod
1873+
result = get_result()
1874+
1875+
# with numeric_only=True, these are dropped, and we get
1876+
# an empty DataFrame back
1877+
expected = df.set_index(keys)[[]]
1878+
if len(keys) != 1 and op == "prod":
1879+
# TODO: why just prod and not sum?
1880+
# Categorical is special without 'observed=True'
1881+
lev = Categorical([0], dtype=values.dtype)
1882+
mi = MultiIndex.from_product([lev, lev], names=["A", "B"])
1883+
expected = DataFrame([], columns=[], index=mi)
1884+
1885+
tm.assert_equal(result, expected)
1886+
return
1887+
1888+
elif df.dtypes[0] == object:
1889+
# FIXME: the test is actually wrong here, xref #41341
1890+
result = get_result()
1891+
# In this case we have list-of-list, will raise TypeError,
1892+
# and subsequently be dropped as nuisance columns
1893+
expected = df.set_index(keys)[[]]
1894+
tm.assert_equal(result, expected)
1895+
return
19091896

19101897
result = get_result()
19111898
expected = df.set_index(keys)[columns]

0 commit comments

Comments
 (0)