diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 2b0c607d6851a..bb15783f4607f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -166,8 +166,6 @@ def test_averages(self, df, method): ], ) - with pytest.raises(TypeError, match="[Cc]ould not convert"): - getattr(gb, method)() result = getattr(gb, method)(numeric_only=True) tm.assert_frame_equal(result.reindex_like(expected), expected) @@ -317,21 +315,6 @@ def gni(self, df): gni = df.groupby("A", as_index=False) return gni - # TODO: non-unique columns, as_index=False - def test_idxmax_nuisance_raises(self, gb): - # GH#5610, GH#41480 - expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) - expected.index.name = "A" - with pytest.raises(TypeError, match="not allowed for this dtype"): - gb.idxmax() - - def test_idxmin_nuisance_raises(self, gb): - # GH#5610, GH#41480 - expected = DataFrame([[0.0], [np.nan]], columns=["B"], index=[1, 3]) - expected.index.name = "A" - with pytest.raises(TypeError, match="not allowed for this dtype"): - gb.idxmin() - def test_describe(self, df, gb, gni): # describe expected_index = Index([1, 3], name="A") diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index ab261c7f1a7c8..b2fc60b76fdf6 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -433,19 +433,12 @@ def test_frame_groupby_columns(tsframe): def test_frame_set_name_single(df): grouped = df.groupby("A") - msg = "The default value of numeric_only" - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() result = grouped.mean(numeric_only=True) assert result.index.name == "A" - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A", as_index=False).mean() result = df.groupby("A", as_index=False).mean(numeric_only=True) assert result.index.name != "A" - with pytest.raises(TypeError, match="Could not convert"): - grouped.agg(np.mean) result = grouped[["C", "D"]].agg(np.mean) assert result.index.name == "A" diff --git a/pandas/tests/groupby/test_grouping.py b/pandas/tests/groupby/test_grouping.py index 26cdfa2291021..7c7b9b29d8709 100644 --- a/pandas/tests/groupby/test_grouping.py +++ b/pandas/tests/groupby/test_grouping.py @@ -55,11 +55,7 @@ def test_column_select_via_attr(self, df): tm.assert_series_equal(result, expected) df["mean"] = 1.5 - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A").mean() result = df.groupby("A").mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A").agg(np.mean) expected = df.groupby("A")[["C", "D", "mean"]].agg(np.mean) tm.assert_frame_equal(result, expected) @@ -289,8 +285,6 @@ def test_grouper_column_and_index(self): result = df_multi.groupby(["B", pd.Grouper(level="inner")]).mean( numeric_only=True ) - with pytest.raises(TypeError, match="Could not convert"): - df_multi.reset_index().groupby(["B", "inner"]).mean() expected = ( df_multi.reset_index().groupby(["B", "inner"]).mean(numeric_only=True) ) @@ -300,8 +294,6 @@ def test_grouper_column_and_index(self): result = df_multi.groupby([pd.Grouper(level="inner"), "B"]).mean( numeric_only=True ) - with pytest.raises(TypeError, match="Could not convert"): - df_multi.reset_index().groupby(["inner", "B"]).mean() expected = ( df_multi.reset_index().groupby(["inner", "B"]).mean(numeric_only=True) ) @@ -310,26 +302,18 @@ def test_grouper_column_and_index(self): # Grouping a single-index frame by a column and the index should # be equivalent to resetting the index and grouping by two columns df_single = df_multi.reset_index("outer") - with pytest.raises(TypeError, match="Could not convert"): - df_single.groupby(["B", pd.Grouper(level="inner")]).mean() result = df_single.groupby(["B", pd.Grouper(level="inner")]).mean( numeric_only=True ) - with pytest.raises(TypeError, match="Could not convert"): - df_single.reset_index().groupby(["B", "inner"]).mean() expected = ( df_single.reset_index().groupby(["B", "inner"]).mean(numeric_only=True) ) tm.assert_frame_equal(result, expected) # Test the reverse grouping order - with pytest.raises(TypeError, match="Could not convert"): - df_single.groupby([pd.Grouper(level="inner"), "B"]).mean() result = df_single.groupby([pd.Grouper(level="inner"), "B"]).mean( numeric_only=True ) - with pytest.raises(TypeError, match="Could not convert"): - df_single.reset_index().groupby(["inner", "B"]).mean() expected = ( df_single.reset_index().groupby(["inner", "B"]).mean(numeric_only=True) ) @@ -406,11 +390,7 @@ def test_empty_groups(self, df): def test_groupby_grouper(self, df): grouped = df.groupby("A") - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(grouped.grouper).mean() result = df.groupby(grouped.grouper).mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_min_max.py b/pandas/tests/groupby/test_min_max.py index 38c4c41e8648d..e85a4c95a2b34 100644 --- a/pandas/tests/groupby/test_min_max.py +++ b/pandas/tests/groupby/test_min_max.py @@ -47,16 +47,12 @@ def test_max_min_object_multiple_columns(using_array_manager): gb = df.groupby("A") - with pytest.raises(TypeError, match="not supported between instances"): - gb.max(numeric_only=False) result = gb[["C"]].max() # "max" is valid for column "C" but not for "B" ei = Index([1, 2, 3], name="A") expected = DataFrame({"C": ["b", "d", "e"]}, index=ei) tm.assert_frame_equal(result, expected) - with pytest.raises(TypeError, match="not supported between instances"): - gb.max(numeric_only=False) result = gb[["C"]].min() # "min" is valid for column "C" but not for "B" ei = Index([1, 2, 3], name="A") diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py new file mode 100644 index 0000000000000..cc3f468349efb --- /dev/null +++ b/pandas/tests/groupby/test_raises.py @@ -0,0 +1,178 @@ +# Only tests that raise an error and have no better location should go here. +# Tests for specific groupby methods should go in their respective +# test file. + +import datetime + +import pytest + +from pandas import DataFrame +from pandas.tests.groupby import get_groupby_method_args + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_string(how, groupby_func, as_index, sort): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": list("xyzwt"), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index, sort=sort) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": (TypeError, "Could not convert"), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": (NotImplementedError, "function is not implemented for this dtype"), + "cummin": (NotImplementedError, "function is not implemented for this dtype"), + "cumprod": (NotImplementedError, "function is not implemented for this dtype"), + "cumsum": (NotImplementedError, "function is not implemented for this dtype"), + "diff": (TypeError, "unsupported operand type"), + "ffill": (None, ""), + "fillna": (None, ""), + "first": (None, ""), + "idxmax": (TypeError, "'argmax' not allowed for this dtype"), + "idxmin": (TypeError, "'argmin' not allowed for this dtype"), + "last": (None, ""), + "max": (None, ""), + "mean": (TypeError, "Could not convert xyz to numeric"), + "median": (TypeError, "could not convert string to float"), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "unsupported operand type"), + "prod": (TypeError, "can't multiply sequence by non-int of type 'str'"), + "quantile": (TypeError, "cannot be performed against 'object' dtypes!"), + "rank": (None, ""), + "sem": (ValueError, "could not convert string to float"), + "shift": (None, ""), + "size": (None, ""), + "skew": (TypeError, "could not convert string to float"), + "std": (ValueError, "could not convert string to float"), + "sum": (None, ""), + "var": (TypeError, "could not convert string to float"), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_string_udf(how): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": list("xyzwt"), + } + ) + gb = df.groupby("a") + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_datetime(how, groupby_func, as_index, sort): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index, sort=sort) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": (TypeError, "cannot perform __mul__ with this index type"), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": (None, ""), + "cummin": (None, ""), + "cumprod": (TypeError, "datetime64 type does not support cumprod operations"), + "cumsum": (TypeError, "datetime64 type does not support cumsum operations"), + "diff": (None, ""), + "ffill": (None, ""), + "fillna": (None, ""), + "first": (None, ""), + "idxmax": (None, ""), + "idxmin": (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": (None, ""), + "median": (None, ""), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "cannot perform __truediv__ with this index type"), + "prod": (TypeError, "datetime64 type does not support prod"), + "quantile": (None, ""), + "rank": (None, ""), + "sem": (TypeError, "Cannot cast DatetimeArray to dtype float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": (TypeError, r"dtype datetime64\[ns\] does not support reduction"), + "std": (TypeError, "Cannot cast DatetimeArray to dtype float64"), + "sum": (TypeError, "datetime64 type does not support sum operations"), + "var": (None, ""), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_datetime_udf(how): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + gb = df.groupby("a") + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) diff --git a/pandas/tests/groupby/transform/test_transform.py b/pandas/tests/groupby/transform/test_transform.py index d0c8b53f13399..4c6f172b00a58 100644 --- a/pandas/tests/groupby/transform/test_transform.py +++ b/pandas/tests/groupby/transform/test_transform.py @@ -426,11 +426,7 @@ def test_transform_nuisance_raises(df): def test_transform_function_aliases(df): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A").transform("mean") result = df.groupby("A").transform("mean", numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A").transform(np.mean) expected = df.groupby("A")[["C", "D"]].transform(np.mean) tm.assert_frame_equal(result, expected) @@ -508,8 +504,6 @@ def test_groupby_transform_with_int(): } ) with np.errstate(all="ignore"): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby("A").transform(lambda x: (x - x.mean()) / x.std()) result = df.groupby("A")[["B", "C"]].transform( lambda x: (x - x.mean()) / x.std() ) @@ -554,8 +548,6 @@ def test_groupby_transform_with_int(): tm.assert_frame_equal(result, expected) # int doesn't get downcasted - with pytest.raises(TypeError, match="unsupported operand type"): - df.groupby("A").transform(lambda x: x * 2 / 2) result = df.groupby("A")[["B", "C"]].transform(lambda x: x * 2 / 2) expected = DataFrame({"B": 1.0, "C": [2.0, 3.0, 4.0, 10.0, 5.0, -1.0]}) tm.assert_frame_equal(result, expected) @@ -748,14 +740,8 @@ def test_cython_transform_frame(op, args, targop): expected = expected.sort_index(axis=1) - if op != "shift": - with pytest.raises(TypeError, match="datetime64 type does not support"): - gb.transform(op, *args).sort_index(axis=1) result = gb[expected.columns].transform(op, *args).sort_index(axis=1) tm.assert_frame_equal(result, expected) - if op != "shift": - with pytest.raises(TypeError, match="datetime64 type does not support"): - getattr(gb, op)(*args).sort_index(axis=1) result = getattr(gb[expected.columns], op)(*args).sort_index(axis=1) tm.assert_frame_equal(result, expected) # individual columns