Skip to content

Commit 884d00f

Browse files
authored
TST: fix groupby-empty xfails (#44092)
1 parent 38dd4ad commit 884d00f

File tree

3 files changed

+95
-13
lines changed

3 files changed

+95
-13
lines changed

pandas/core/groupby/generic.py

+2
Original file line numberDiff line numberDiff line change
@@ -1536,6 +1536,7 @@ def func(df):
15361536
result = [index[i] if i >= 0 else np.nan for i in indices]
15371537
return df._constructor_sliced(result, index=res.index)
15381538

1539+
func.__name__ = "idxmax"
15391540
return self._python_apply_general(func, self._obj_with_exclusions)
15401541

15411542
@Appender(DataFrame.idxmin.__doc__)
@@ -1557,6 +1558,7 @@ def func(df):
15571558
result = [index[i] if i >= 0 else np.nan for i in indices]
15581559
return df._constructor_sliced(result, index=res.index)
15591560

1561+
func.__name__ = "idxmin"
15601562
return self._python_apply_general(func, self._obj_with_exclusions)
15611563

15621564
boxplot = boxplot_frame_groupby

pandas/core/groupby/ops.py

+12
Original file line numberDiff line numberDiff line change
@@ -753,6 +753,18 @@ def apply(
753753
mutated = True
754754
result_values.append(res)
755755

756+
# getattr pattern for __name__ is needed for functools.partial objects
757+
if len(group_keys) == 0 and getattr(f, "__name__", None) not in [
758+
"idxmin",
759+
"idxmax",
760+
"nanargmin",
761+
"nanargmax",
762+
]:
763+
# If group_keys is empty, then no function calls have been made,
764+
# so we will not have raised even if this is an invalid dtype.
765+
# So do one dummy call here to raise appropriate TypeError.
766+
f(data.iloc[:0])
767+
756768
return result_values, mutated
757769

758770
@cache_readonly

pandas/tests/groupby/test_groupby.py

+81-13
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,11 @@
2424
to_datetime,
2525
)
2626
import pandas._testing as tm
27+
from pandas.core.arrays import (
28+
BooleanArray,
29+
FloatingArray,
30+
IntegerArray,
31+
)
2732
from pandas.core.base import SpecificationError
2833
import pandas.core.common as com
2934

@@ -1822,17 +1827,23 @@ def test_pivot_table_values_key_error():
18221827
)
18231828
@pytest.mark.filterwarnings("ignore:Dropping invalid columns:FutureWarning")
18241829
@pytest.mark.filterwarnings("ignore:.*Select only valid:FutureWarning")
1825-
def test_empty_groupby(columns, keys, values, method, op, request):
1830+
def test_empty_groupby(columns, keys, values, method, op, request, using_array_manager):
18261831
# GH8093 & GH26411
18271832
override_dtype = None
18281833

18291834
if (
18301835
isinstance(values, Categorical)
18311836
and not isinstance(columns, list)
1832-
and op in ["sum", "prod"]
1837+
and op in ["sum", "prod", "skew", "mad"]
18331838
):
18341839
# handled below GH#41291
1835-
pass
1840+
1841+
if using_array_manager and op == "mad":
1842+
right_msg = "Cannot interpret 'CategoricalDtype.* as a data type"
1843+
msg = "Regex pattern \"'Categorical' does not implement.*" + right_msg
1844+
mark = pytest.mark.xfail(raises=AssertionError, match=msg)
1845+
request.node.add_marker(mark)
1846+
18361847
elif (
18371848
isinstance(values, Categorical)
18381849
and len(keys) == 1
@@ -1851,11 +1862,7 @@ def test_empty_groupby(columns, keys, values, method, op, request):
18511862
raises=TypeError, match="'Categorical' does not implement"
18521863
)
18531864
request.node.add_marker(mark)
1854-
elif (
1855-
isinstance(values, Categorical)
1856-
and len(keys) == 1
1857-
and op in ["mad", "min", "max", "sum", "prod", "skew"]
1858-
):
1865+
elif isinstance(values, Categorical) and len(keys) == 1 and op in ["sum", "prod"]:
18591866
mark = pytest.mark.xfail(
18601867
raises=AssertionError, match="(DataFrame|Series) are different"
18611868
)
@@ -1869,7 +1876,30 @@ def test_empty_groupby(columns, keys, values, method, op, request):
18691876
raises=AssertionError, match="(DataFrame|Series) are different"
18701877
)
18711878
request.node.add_marker(mark)
1872-
elif isinstance(values, pd.core.arrays.BooleanArray) and op in ["sum", "prod"]:
1879+
elif (
1880+
isinstance(values, (IntegerArray, FloatingArray))
1881+
and op == "mad"
1882+
and isinstance(columns, list)
1883+
):
1884+
mark = pytest.mark.xfail(
1885+
raises=TypeError, match="can only perform ops with numeric values"
1886+
)
1887+
request.node.add_marker(mark)
1888+
1889+
elif (
1890+
op == "mad"
1891+
and not isinstance(columns, list)
1892+
and isinstance(values, pd.DatetimeIndex)
1893+
and values.tz is not None
1894+
and using_array_manager
1895+
):
1896+
mark = pytest.mark.xfail(
1897+
raises=TypeError,
1898+
match=r"Cannot interpret 'datetime64\[ns, US/Eastern\]' as a data type",
1899+
)
1900+
request.node.add_marker(mark)
1901+
1902+
elif isinstance(values, BooleanArray) and op in ["sum", "prod"]:
18731903
# We expect to get Int64 back for these
18741904
override_dtype = "Int64"
18751905

@@ -1895,19 +1925,29 @@ def get_result():
18951925

18961926
if columns == "C":
18971927
# i.e. SeriesGroupBy
1898-
if op in ["prod", "sum"]:
1928+
if op in ["prod", "sum", "skew"]:
18991929
# ops that require more than just ordered-ness
19001930
if df.dtypes[0].kind == "M":
19011931
# GH#41291
19021932
# datetime64 -> prod and sum are invalid
1903-
msg = "datetime64 type does not support"
1933+
if op == "skew":
1934+
msg = "'DatetimeArray' does not implement reduction 'skew'"
1935+
else:
1936+
msg = "datetime64 type does not support"
19041937
with pytest.raises(TypeError, match=msg):
19051938
get_result()
19061939

19071940
return
1908-
elif isinstance(values, Categorical):
1941+
if op in ["prod", "sum", "skew", "mad"]:
1942+
if isinstance(values, Categorical):
19091943
# GH#41291
1910-
msg = "category type does not support"
1944+
if op == "mad":
1945+
# mad calls mean, which Categorical doesn't implement
1946+
msg = "'Categorical' does not implement reduction 'mean'"
1947+
elif op == "skew":
1948+
msg = f"'Categorical' does not implement reduction '{op}'"
1949+
else:
1950+
msg = "category type does not support"
19111951
with pytest.raises(TypeError, match=msg):
19121952
get_result()
19131953

@@ -1954,6 +1994,34 @@ def get_result():
19541994
tm.assert_equal(result, expected)
19551995
return
19561996

1997+
if (
1998+
op in ["mad", "min", "max", "skew"]
1999+
and isinstance(values, Categorical)
2000+
and len(keys) == 1
2001+
):
2002+
# Categorical doesn't implement, so with numeric_only=True
2003+
# these are dropped and we get an empty DataFrame back
2004+
result = get_result()
2005+
expected = df.set_index(keys)[[]]
2006+
2007+
# with numeric_only=True, these are dropped, and we get
2008+
# an empty DataFrame back
2009+
if len(keys) != 1:
2010+
# Categorical is special without 'observed=True'
2011+
lev = Categorical([0], dtype=values.dtype)
2012+
mi = MultiIndex.from_product([lev, lev], names=keys)
2013+
expected = DataFrame([], columns=[], index=mi)
2014+
else:
2015+
# all columns are dropped, but we end up with one row
2016+
# Categorical is special without 'observed=True'
2017+
lev = Categorical([0], dtype=values.dtype)
2018+
ci = Index(lev, name=keys[0])
2019+
expected = DataFrame([], columns=[], index=ci)
2020+
# expected = df.set_index(keys)[columns]
2021+
2022+
tm.assert_equal(result, expected)
2023+
return
2024+
19572025
result = get_result()
19582026
expected = df.set_index(keys)[columns]
19592027
if override_dtype is not None:

0 commit comments

Comments
 (0)