Skip to content

TST: Consolidate tests that raise in groupby #50749

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
5 changes: 0 additions & 5 deletions pandas/tests/groupby/aggregate/test_cython.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,13 +95,8 @@ def test_cython_agg_nothing_to_agg():
with pytest.raises(TypeError, match="Cannot use numeric_only=True"):
frame.groupby("a")["b"].mean(numeric_only=True)

with pytest.raises(TypeError, match="Could not convert (foo|bar)*"):
frame.groupby("a")["b"].mean()

frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25})

with pytest.raises(TypeError, match="Could not convert"):
frame[["b"]].groupby(frame["a"]).mean()
result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True)
expected = DataFrame(
[], index=frame["a"].sort_values().drop_duplicates(), columns=[]
Expand Down
3 changes: 0 additions & 3 deletions pandas/tests/groupby/test_categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,6 @@ def test_basic(): # TODO: split this test
gb = df.groupby("A", observed=False)
exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True)
expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)})
msg = "category type does not support sum operations"
with pytest.raises(TypeError, match=msg):
gb.sum()
result = gb.sum(numeric_only=True)
tm.assert_frame_equal(result, expected)

Expand Down
28 changes: 0 additions & 28 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -465,8 +465,6 @@ def test_multi_func(df):
col2 = df["B"]

grouped = df.groupby([col1.get, col2.get])
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
agged = grouped.mean(numeric_only=True)
expected = df.groupby(["A", "B"]).mean()

Expand Down Expand Up @@ -663,17 +661,11 @@ def test_groupby_as_index_agg(df):

# single-key

with pytest.raises(TypeError, match="Could not convert"):
grouped.agg(np.mean)
result = grouped[["C", "D"]].agg(np.mean)
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
expected = grouped.mean(numeric_only=True)
tm.assert_frame_equal(result, expected)

result2 = grouped.agg({"C": np.mean, "D": np.sum})
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
expected2 = grouped.mean(numeric_only=True)
expected2["D"] = grouped.sum()["D"]
tm.assert_frame_equal(result2, expected2)
Expand Down Expand Up @@ -791,11 +783,7 @@ def test_groupby_as_index_cython(df):

# single-key
grouped = data.groupby("A", as_index=False)
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
result = grouped.mean(numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
data.groupby(["A"]).mean()
expected = data.groupby(["A"]).mean(numeric_only=True)
expected.insert(0, "A", expected.index)
expected.index = RangeIndex(len(expected))
Expand Down Expand Up @@ -958,11 +946,7 @@ def test_empty_groups_corner(mframe):
)

grouped = df.groupby(["k1", "k2"])
with pytest.raises(TypeError, match="Could not convert"):
grouped.agg(np.mean)
result = grouped[["v1", "v2"]].agg(np.mean)
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
expected = grouped.mean(numeric_only=True)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -1146,8 +1130,6 @@ def test_groupby_with_hier_columns():
# add a nuisance column
sorted_columns, _ = columns.sortlevel(0)
df["A", "foo"] = "bar"
with pytest.raises(TypeError, match="Could not convert"):
df.groupby(level=0).mean()
result = df.groupby(level=0).mean(numeric_only=True)
tm.assert_index_equal(result.columns, df.columns[:-1])

Expand Down Expand Up @@ -1181,11 +1163,7 @@ def test_groupby_wrong_multi_labels():


def test_groupby_series_with_name(df):
with pytest.raises(TypeError, match="Could not convert"):
df.groupby(df["A"]).mean()
result = df.groupby(df["A"]).mean(numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
df.groupby(df["A"], as_index=False).mean()
result2 = df.groupby(df["A"], as_index=False).mean(numeric_only=True)
assert result.index.name == "A"
assert "A" in result2
Expand Down Expand Up @@ -1335,11 +1313,7 @@ def test_groupby_unit64_float_conversion():


def test_groupby_list_infer_array_like(df):
with pytest.raises(TypeError, match="Could not convert"):
df.groupby(list(df["A"])).mean()
result = df.groupby(list(df["A"])).mean(numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
df.groupby(df["A"]).mean()
expected = df.groupby(df["A"]).mean(numeric_only=True)
tm.assert_frame_equal(result, expected, check_names=False)

Expand Down Expand Up @@ -1453,8 +1427,6 @@ def test_groupby_2d_malformed():
d["zeros"] = [0, 0]
d["ones"] = [1, 1]
d["label"] = ["l1", "l2"]
with pytest.raises(TypeError, match="Could not convert"):
d.groupby(["group"]).mean()
tmp = d.groupby(["group"]).mean(numeric_only=True)
res_values = np.array([[0.0, 1.0], [0.0, 1.0]])
tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"]))
Expand Down
5 changes: 0 additions & 5 deletions pandas/tests/groupby/test_index_as_string.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,7 @@ def series():
)
def test_grouper_index_level_as_string(frame, key_strs, groupers):
if "B" not in key_strs or "outer" in frame.columns:
with pytest.raises(TypeError, match="Could not convert"):
frame.groupby(key_strs).mean()
result = frame.groupby(key_strs).mean(numeric_only=True)

with pytest.raises(TypeError, match="Could not convert"):
frame.groupby(groupers).mean()
expected = frame.groupby(groupers).mean(numeric_only=True)
else:
result = frame.groupby(key_strs).mean()
Expand Down
122 changes: 121 additions & 1 deletion pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@

import pytest

from pandas import DataFrame
from pandas import (
Categorical,
DataFrame,
)
from pandas.tests.groupby import get_groupby_method_args


Expand Down Expand Up @@ -176,3 +179,120 @@ def func(x):

with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)


@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_category(how, groupby_func, as_index, sort):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": Categorical(
["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True
),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index, sort=sort)

klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (
TypeError,
r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'",
),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (NotImplementedError, "category dtype not supported"),
"cummin": (NotImplementedError, "category dtype not supported"),
"cumprod": (TypeError, "category type does not support cumprod operations"),
"cumsum": (TypeError, "category type does not support cumsum operations"),
"diff": (
TypeError,
r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'",
),
"ffill": (None, ""),
"fillna": (
TypeError,
", ".join(
[
r"Cannot setitem on a Categorical with a new category \(0\)",
"set the categories first",
]
),
),
"first": (None, ""),
"idxmax": (None, ""),
"idxmin": (None, ""),
"last": (None, ""),
"max": (None, ""),
"mean": (
TypeError,
"'Categorical' with dtype category does not support reduction 'mean'",
),
"median": (
TypeError,
"'Categorical' with dtype category does not support reduction 'median'",
),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (
TypeError,
r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'",
),
"prod": (TypeError, "category type does not support prod operations"),
"quantile": (TypeError, "No matching signature found"),
"rank": (None, ""),
"sem": (ValueError, "Cannot cast object dtype to float64"),
"shift": (None, ""),
"size": (None, ""),
"skew": (
TypeError,
"'Categorical' with dtype category does not support reduction 'skew'",
),
"std": (ValueError, "Cannot cast object dtype to float64"),
"sum": (TypeError, "category type does not support sum operations"),
"var": (
TypeError,
"'Categorical' with dtype category does not support reduction 'var'",
),
}[groupby_func]

if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)


@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_category_udf(how):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": Categorical(
["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True
),
}
)
gb = df.groupby("a")

def func(x):
raise TypeError("Test error message")

with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)