Skip to content

TST: Consolidate tests that raise in groupby #50404

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Dec 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 0 additions & 17 deletions pandas/tests/groupby/test_function.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,8 +166,6 @@ def test_averages(self, df, method):
],
)

with pytest.raises(TypeError, match="[Cc]ould not convert"):
getattr(gb, method)()
result = getattr(gb, method)(numeric_only=True)
tm.assert_frame_equal(result.reindex_like(expected), expected)

Expand Down Expand Up @@ -317,21 +315,6 @@ def gni(self, df):
gni = df.groupby("A", as_index=False)
return gni

# TODO: non-unique columns, as_index=False
def test_idxmax_nuisance_raises(self, gb):
# GH#5610, GH#41480
expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3])
expected.index.name = "A"
with pytest.raises(TypeError, match="not allowed for this dtype"):
gb.idxmax()

def test_idxmin_nuisance_raises(self, gb):
# GH#5610, GH#41480
expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3])
expected.index.name = "A"
with pytest.raises(TypeError, match="not allowed for this dtype"):
gb.idxmin()

def test_describe(self, df, gb, gni):
# describe
expected_index = Index([1, 3], name="A")
Expand Down
7 changes: 0 additions & 7 deletions pandas/tests/groupby/test_groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,19 +433,12 @@ def test_frame_groupby_columns(tsframe):
def test_frame_set_name_single(df):
grouped = df.groupby("A")

msg = "The default value of numeric_only"
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
result = grouped.mean(numeric_only=True)
assert result.index.name == "A"

with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A", as_index=False).mean()
result = df.groupby("A", as_index=False).mean(numeric_only=True)
assert result.index.name != "A"

with pytest.raises(TypeError, match="Could not convert"):
grouped.agg(np.mean)
result = grouped[["C", "D"]].agg(np.mean)
assert result.index.name == "A"

Expand Down
20 changes: 0 additions & 20 deletions pandas/tests/groupby/test_grouping.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,7 @@ def test_column_select_via_attr(self, df):
tm.assert_series_equal(result, expected)

df["mean"] = 1.5
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A").mean()
result = df.groupby("A").mean(numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A").agg(np.mean)
expected = df.groupby("A")[["C", "D", "mean"]].agg(np.mean)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -289,8 +285,6 @@ def test_grouper_column_and_index(self):
result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean(
numeric_only=True
)
with pytest.raises(TypeError, match="Could not convert"):
df_multi.reset_index().groupby(["B", "inner"]).mean()
expected = (
df_multi.reset_index().groupby(["B", "inner"]).mean(numeric_only=True)
)
Expand All @@ -300,8 +294,6 @@ def test_grouper_column_and_index(self):
result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean(
numeric_only=True
)
with pytest.raises(TypeError, match="Could not convert"):
df_multi.reset_index().groupby(["inner", "B"]).mean()
expected = (
df_multi.reset_index().groupby(["inner", "B"]).mean(numeric_only=True)
)
Expand All @@ -310,26 +302,18 @@ def test_grouper_column_and_index(self):
# Grouping a single-index frame by a column and the index should
# be equivalent to resetting the index and grouping by two columns
df_single = df_multi.reset_index("outer")
with pytest.raises(TypeError, match="Could not convert"):
df_single.groupby(["B", pd.Grouper(level="inner")]).mean()
result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean(
numeric_only=True
)
with pytest.raises(TypeError, match="Could not convert"):
df_single.reset_index().groupby(["B", "inner"]).mean()
expected = (
df_single.reset_index().groupby(["B", "inner"]).mean(numeric_only=True)
)
tm.assert_frame_equal(result, expected)

# Test the reverse grouping order
with pytest.raises(TypeError, match="Could not convert"):
df_single.groupby([pd.Grouper(level="inner"), "B"]).mean()
result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean(
numeric_only=True
)
with pytest.raises(TypeError, match="Could not convert"):
df_single.reset_index().groupby(["inner", "B"]).mean()
expected = (
df_single.reset_index().groupby(["inner", "B"]).mean(numeric_only=True)
)
Expand Down Expand Up @@ -406,11 +390,7 @@ def test_empty_groups(self, df):
def test_groupby_grouper(self, df):
grouped = df.groupby("A")

with pytest.raises(TypeError, match="Could not convert"):
df.groupby(grouped.grouper).mean()
result = df.groupby(grouped.grouper).mean(numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
grouped.mean()
expected = grouped.mean(numeric_only=True)
tm.assert_frame_equal(result, expected)

Expand Down
4 changes: 0 additions & 4 deletions pandas/tests/groupby/test_min_max.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,12 @@ def test_max_min_object_multiple_columns(using_array_manager):

gb = df.groupby("A")

with pytest.raises(TypeError, match="not supported between instances"):
gb.max(numeric_only=False)
result = gb[["C"]].max()
# "max" is valid for column "C" but not for "B"
ei = Index([1, 2, 3], name="A")
expected = DataFrame({"C": ["b", "d", "e"]}, index=ei)
tm.assert_frame_equal(result, expected)

with pytest.raises(TypeError, match="not supported between instances"):
gb.max(numeric_only=False)
result = gb[["C"]].min()
# "min" is valid for column "C" but not for "B"
ei = Index([1, 2, 3], name="A")
Expand Down
178 changes: 178 additions & 0 deletions pandas/tests/groupby/test_raises.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,178 @@
# Only tests that raise an error and have no better location should go here.
# Tests for specific groupby methods should go in their respective
# test file.

import datetime

import pytest

from pandas import DataFrame
from pandas.tests.groupby import get_groupby_method_args


@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_string(how, groupby_func, as_index, sort):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": list("xyzwt"),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index, sort=sort)

klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (TypeError, "Could not convert"),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (NotImplementedError, "function is not implemented for this dtype"),
"cummin": (NotImplementedError, "function is not implemented for this dtype"),
"cumprod": (NotImplementedError, "function is not implemented for this dtype"),
"cumsum": (NotImplementedError, "function is not implemented for this dtype"),
"diff": (TypeError, "unsupported operand type"),
"ffill": (None, ""),
"fillna": (None, ""),
"first": (None, ""),
"idxmax": (TypeError, "'argmax' not allowed for this dtype"),
"idxmin": (TypeError, "'argmin' not allowed for this dtype"),
"last": (None, ""),
"max": (None, ""),
"mean": (TypeError, "Could not convert xyz to numeric"),
"median": (TypeError, "could not convert string to float"),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (TypeError, "unsupported operand type"),
"prod": (TypeError, "can't multiply sequence by non-int of type 'str'"),
"quantile": (TypeError, "cannot be performed against 'object' dtypes!"),
"rank": (None, ""),
"sem": (ValueError, "could not convert string to float"),
"shift": (None, ""),
"size": (None, ""),
"skew": (TypeError, "could not convert string to float"),
"std": (ValueError, "could not convert string to float"),
"sum": (None, ""),
"var": (TypeError, "could not convert string to float"),
}[groupby_func]

if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)


@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_string_udf(how):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": list("xyzwt"),
}
)
gb = df.groupby("a")

def func(x):
raise TypeError("Test error message")

with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)


@pytest.mark.parametrize("how", ["method", "agg", "transform"])
def test_groupby_raises_datetime(how, groupby_func, as_index, sort):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
}
)
args = get_groupby_method_args(groupby_func, df)
gb = df.groupby("a", as_index=as_index, sort=sort)

klass, msg = {
"all": (None, ""),
"any": (None, ""),
"bfill": (None, ""),
"corrwith": (TypeError, "cannot perform __mul__ with this index type"),
"count": (None, ""),
"cumcount": (None, ""),
"cummax": (None, ""),
"cummin": (None, ""),
"cumprod": (TypeError, "datetime64 type does not support cumprod operations"),
"cumsum": (TypeError, "datetime64 type does not support cumsum operations"),
"diff": (None, ""),
"ffill": (None, ""),
"fillna": (None, ""),
"first": (None, ""),
"idxmax": (None, ""),
"idxmin": (None, ""),
"last": (None, ""),
"max": (None, ""),
"mean": (None, ""),
"median": (None, ""),
"min": (None, ""),
"ngroup": (None, ""),
"nunique": (None, ""),
"pct_change": (TypeError, "cannot perform __truediv__ with this index type"),
"prod": (TypeError, "datetime64 type does not support prod"),
"quantile": (None, ""),
"rank": (None, ""),
"sem": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
"shift": (None, ""),
"size": (None, ""),
"skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"),
"std": (TypeError, "Cannot cast DatetimeArray to dtype float64"),
"sum": (TypeError, "datetime64 type does not support sum operations"),
"var": (None, ""),
}[groupby_func]

if klass is None:
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)
else:
with pytest.raises(klass, match=msg):
if how == "method":
getattr(gb, groupby_func)(*args)
elif how == "agg":
gb.agg(groupby_func, *args)
else:
gb.transform(groupby_func, *args)


@pytest.mark.parametrize("how", ["agg", "transform"])
def test_groupby_raises_datetime_udf(how):
df = DataFrame(
{
"a": [1, 1, 1, 2, 2],
"b": range(5),
"c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000),
}
)
gb = df.groupby("a")

def func(x):
raise TypeError("Test error message")

with pytest.raises(TypeError, match="Test error message"):
getattr(gb, how)(func)
14 changes: 0 additions & 14 deletions pandas/tests/groupby/transform/test_transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -426,11 +426,7 @@ def test_transform_nuisance_raises(df):


def test_transform_function_aliases(df):
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A").transform("mean")
result = df.groupby("A").transform("mean", numeric_only=True)
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A").transform(np.mean)
expected = df.groupby("A")[["C", "D"]].transform(np.mean)
tm.assert_frame_equal(result, expected)

Expand Down Expand Up @@ -508,8 +504,6 @@ def test_groupby_transform_with_int():
}
)
with np.errstate(all="ignore"):
with pytest.raises(TypeError, match="Could not convert"):
df.groupby("A").transform(lambda x: (x - x.mean()) / x.std())
result = df.groupby("A")[["B", "C"]].transform(
lambda x: (x - x.mean()) / x.std()
)
Expand Down Expand Up @@ -554,8 +548,6 @@ def test_groupby_transform_with_int():
tm.assert_frame_equal(result, expected)

# int doesn't get downcasted
with pytest.raises(TypeError, match="unsupported operand type"):
df.groupby("A").transform(lambda x: x * 2 / 2)
result = df.groupby("A")[["B", "C"]].transform(lambda x: x * 2 / 2)
expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]})
tm.assert_frame_equal(result, expected)
Expand Down Expand Up @@ -748,14 +740,8 @@ def test_cython_transform_frame(op, args, targop):

expected = expected.sort_index(axis=1)

if op != "shift":
with pytest.raises(TypeError, match="datetime64 type does not support"):
gb.transform(op, *args).sort_index(axis=1)
result = gb[expected.columns].transform(op, *args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)
if op != "shift":
with pytest.raises(TypeError, match="datetime64 type does not support"):
getattr(gb, op)(*args).sort_index(axis=1)
result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1)
tm.assert_frame_equal(result, expected)
# individual columns
Expand Down