diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index d3f9dd31e9fa1..487daddd3d214 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -96,13 +96,8 @@ def test_cython_agg_nothing_to_agg(): with pytest.raises(TypeError, match=msg): frame.groupby("a")["b"].mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert (foo|bar)*"): - frame.groupby("a")["b"].mean() - frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) - with pytest.raises(TypeError, match="Could not convert"): - frame[["b"]].groupby(frame["a"]).mean() result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) expected = DataFrame( [], index=frame["a"].sort_values().drop_duplicates(), columns=[] diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 986ee48ca9876..9fe35876dc5b5 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,9 +103,6 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - msg = "category type does not support sum operations" - with pytest.raises(TypeError, match=msg): - gb.sum() result = gb.sum(numeric_only=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index d7b015fa7104a..a7bd89942ea79 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -465,8 +465,6 @@ def test_multi_func(df): col2 = df["B"] grouped = df.groupby([col1.get, col2.get]) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() agged = grouped.mean(numeric_only=True) expected = df.groupby(["A", "B"]).mean() @@ -665,17 +663,11 @@ def test_groupby_as_index_agg(df): # single-key - with pytest.raises(TypeError, match="Could not convert"): - grouped.agg(np.mean) result = grouped[["C", "D"]].agg(np.mean) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) result2 = grouped.agg({"C": np.mean, "D": np.sum}) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected2 = grouped.mean(numeric_only=True) expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) @@ -793,11 +785,7 @@ def test_groupby_as_index_cython(df): # single-key grouped = data.groupby("A", as_index=False) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() result = grouped.mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - data.groupby(["A"]).mean() expected = data.groupby(["A"]).mean(numeric_only=True) expected.insert(0, "A", expected.index) expected.index = RangeIndex(len(expected)) @@ -960,11 +948,7 @@ def test_empty_groups_corner(mframe): ) grouped = df.groupby(["k1", "k2"]) - with pytest.raises(TypeError, match="Could not convert"): - grouped.agg(np.mean) result = grouped[["v1", "v2"]].agg(np.mean) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -1148,8 +1132,6 @@ def test_groupby_with_hier_columns(): # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(level=0).mean() result = df.groupby(level=0).mean(numeric_only=True) tm.assert_index_equal(result.columns, df.columns[:-1]) @@ -1183,11 +1165,7 @@ def test_groupby_wrong_multi_labels(): def test_groupby_series_with_name(df): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"]).mean() result = df.groupby(df["A"]).mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"], as_index=False).mean() result2 = df.groupby(df["A"], as_index=False).mean(numeric_only=True) assert result.index.name == "A" assert "A" in result2 @@ -1337,11 +1315,7 @@ def test_groupby_unit64_float_conversion(): def test_groupby_list_infer_array_like(df): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(list(df["A"])).mean() result = df.groupby(list(df["A"])).mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"]).mean() expected = df.groupby(df["A"]).mean(numeric_only=True) tm.assert_frame_equal(result, expected, check_names=False) @@ -1455,8 +1429,6 @@ def test_groupby_2d_malformed(): d["zeros"] = [0, 0] d["ones"] = [1, 1] d["label"] = ["l1", "l2"] - with pytest.raises(TypeError, match="Could not convert"): - d.groupby(["group"]).mean() tmp = d.groupby(["group"]).mean(numeric_only=True) res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index d3d34dfd6f90a..e32b5eb44d5dc 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -48,12 +48,7 @@ def series(): ) def test_grouper_index_level_as_string(frame, key_strs, groupers): if "B" not in key_strs or "outer" in frame.columns: - with pytest.raises(TypeError, match="Could not convert"): - frame.groupby(key_strs).mean() result = frame.groupby(key_strs).mean(numeric_only=True) - - with pytest.raises(TypeError, match="Could not convert"): - frame.groupby(groupers).mean() expected = frame.groupby(groupers).mean(numeric_only=True) else: result = frame.groupby(key_strs).mean() diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 1c889e6ed457a..6ceb23a3c44b6 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -4,23 +4,60 @@ import datetime +import numpy as np import pytest -from pandas import DataFrame +from pandas import ( + Categorical, + DataFrame, + Grouper, + Series, +) from pandas.tests.groupby import get_groupby_method_args +@pytest.fixture( + params=[ + "a", + ["a"], + ["a", "b"], + Grouper(key="a"), + lambda x: x % 2, + [0, 0, 0, 1, 2, 2, 2, 3, 3], + np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]), + dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])), + Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), + [Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])], + ] +) +def by(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def groupby_series(request): + return request.param + + @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_string(how, groupby_func, as_index, sort): +def test_groupby_raises_string(how, by, groupby_series, groupby_func): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": list("xyzwt"), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index, sort=sort) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return klass, msg = { "all": (None, ""), @@ -29,10 +66,22 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): "corrwith": (TypeError, "Could not convert"), "count": (None, ""), "cumcount": (None, ""), - "cummax": (NotImplementedError, "function is not implemented for this dtype"), - "cummin": (NotImplementedError, "function is not implemented for this dtype"), - "cumprod": (NotImplementedError, "function is not implemented for this dtype"), - "cumsum": (NotImplementedError, "function is not implemented for this dtype"), + "cummax": ( + (NotImplementedError, TypeError), + "(function|cummax) is not (implemented|supported) for (this|object) dtype", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(function|cummin) is not (implemented|supported) for (this|object) dtype", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(function|cumprod) is not (implemented|supported) for (this|object) dtype", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(function|cumsum) is not (implemented|supported) for (this|object) dtype", + ), "diff": (TypeError, "unsupported operand type"), "ffill": (None, ""), "fillna": (None, ""), @@ -41,7 +90,7 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): "idxmin": (TypeError, "'argmin' not allowed for this dtype"), "last": (None, ""), "max": (None, ""), - "mean": (TypeError, "Could not convert xyz to numeric"), + "mean": (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"), "median": (TypeError, "could not convert string to float"), "min": (None, ""), "ngroup": (None, ""), @@ -77,15 +126,19 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_string_udf(how): +def test_groupby_raises_string_udf(how, by, groupby_series): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": list("xyzwt"), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), } ) - gb = df.groupby("a") + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] def func(x): raise TypeError("Test error message") @@ -94,17 +147,54 @@ def func(x): getattr(gb, how)(func) +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np): + # GH#50749 + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), + } + ) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + klass, msg = { + np.sum: (None, ""), + np.mean: (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_datetime(how, groupby_func, as_index, sort): +def test_groupby_raises_datetime(how, by, groupby_series, groupby_func): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index, sort=sort) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return klass, msg = { "all": (None, ""), @@ -161,15 +251,200 @@ def test_groupby_raises_datetime(how, groupby_func, as_index, sort): @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_datetime_udf(how): +def test_groupby_raises_datetime_udf(how, by, groupby_series): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np): + # GH#50749 + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + klass, msg = { + np.sum: (TypeError, "datetime64 type does not support sum operations"), + np.mean: (None, ""), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_category(how, by, groupby_series, groupby_func): + # GH#50749 + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": ( + TypeError, + r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'", + ), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": ( + (NotImplementedError, TypeError), + "(category type does not support cummax operations|" + + "category dtype not supported|" + + "cummax is not supported for category dtype)", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(category type does not support cummin operations|" + + "category dtype not supported|" + "cummin is not supported for category dtype)", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(category type does not support cumprod operations|" + + "category dtype not supported|" + "cumprod is not supported for category dtype)", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(category type does not support cumsum operations|" + + "category dtype not supported|" + "cumsum is not supported for category dtype)", + ), + "diff": ( + TypeError, + r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'", + ), + "ffill": (None, ""), + "fillna": ( + TypeError, + r"Cannot setitem on a Categorical with a new category \(0\), " + + "set the categories first", + ), + "first": (None, ""), + "idxmax": (None, ""), + "idxmin": (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + "median": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'median'", + ), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": ( + TypeError, + r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'", + ), + "prod": (TypeError, "category type does not support prod operations"), + "quantile": (TypeError, "No matching signature found"), + "rank": (None, ""), + "sem": (ValueError, "Cannot cast object dtype to float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'skew'", + ), + "std": (ValueError, "Cannot cast object dtype to float64"), + "sum": (TypeError, "category type does not support sum operations"), + "var": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'var'", + ), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_category_udf(how, by, groupby_series): + # GH#50749 df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), } ) - gb = df.groupby("a") + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] def func(x): raise TypeError("Test error message") @@ -178,6 +453,172 @@ def func(x): getattr(gb, how)(func) +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np): + # GH#50749 + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + } + ) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + klass, msg = { + np.sum: (TypeError, "category type does not support sum operations"), + np.mean: ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_category_on_category( + how, by, groupby_series, groupby_func, observed +): + # GH#50749 + df = DataFrame( + { + "a": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby(by=by, observed=observed) + + if groupby_series: + gb = gb["d"] + + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + + empty_groups = any(group.empty for group in gb.groups.values()) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": ( + TypeError, + r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'", + ), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": ( + (NotImplementedError, TypeError), + "(cummax is not supported for category dtype|" + + "category dtype not supported|" + + "category type does not support cummax operations)", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(cummin is not supported for category dtype|" + + "category dtype not supported|" + "category type does not support cummin operations)", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(cumprod is not supported for category dtype|" + + "category dtype not supported|" + "category type does not support cumprod operations)", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(cumsum is not supported for category dtype|" + + "category dtype not supported|" + + "category type does not support cumsum operations)", + ), + "diff": (TypeError, "unsupported operand type"), + "ffill": (None, ""), + "fillna": ( + TypeError, + r"Cannot setitem on a Categorical with a new category \(0\), " + + "set the categories first", + ), + "first": (None, ""), + "idxmax": (ValueError, "attempt to get argmax of an empty sequence") + if empty_groups + else (None, ""), + "idxmin": (ValueError, "attempt to get argmin of an empty sequence") + if empty_groups + else (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + "median": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'median'", + ), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "unsupported operand type"), + "prod": (TypeError, "category type does not support prod operations"), + "quantile": (TypeError, ""), + "rank": (None, ""), + "sem": (ValueError, "Cannot cast object dtype to float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'skew'", + ), + "std": (ValueError, "Cannot cast object dtype to float64"), + "sum": (TypeError, "category type does not support sum operations"), + "var": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'var'", + ), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + def test_subsetting_columns_axis_1_raises(): # GH 35443 df = DataFrame({"a": [1], "b": [2], "c": [3]})