From fdeb6f7146255462f2f057462489351014fa8ddd Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 14 Jan 2023 20:24:44 +0100 Subject: [PATCH 1/6] TST: Consolidate tests that raise in groupby --- pandas/tests/groupby/aggregate/test_cython.py | 5 - pandas/tests/groupby/test_categorical.py | 3 - pandas/tests/groupby/test_groupby.py | 28 ----- pandas/tests/groupby/test_index_as_string.py | 5 - pandas/tests/groupby/test_raises.py | 117 +++++++++++++++++- 5 files changed, 116 insertions(+), 42 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_cython.py b/pandas/tests/groupby/aggregate/test_cython.py index 7e7d3d682f20f..12aea33abc82b 100644 --- a/pandas/tests/groupby/aggregate/test_cython.py +++ b/pandas/tests/groupby/aggregate/test_cython.py @@ -95,13 +95,8 @@ def test_cython_agg_nothing_to_agg(): with pytest.raises(TypeError, match="Cannot use numeric_only=True"): frame.groupby("a")["b"].mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert (foo|bar)*"): - frame.groupby("a")["b"].mean() - frame = DataFrame({"a": np.random.randint(0, 5, 50), "b": ["foo", "bar"] * 25}) - with pytest.raises(TypeError, match="Could not convert"): - frame[["b"]].groupby(frame["a"]).mean() result = frame[["b"]].groupby(frame["a"]).mean(numeric_only=True) expected = DataFrame( [], index=frame["a"].sort_values().drop_duplicates(), columns=[] diff --git a/pandas/tests/groupby/test_categorical.py b/pandas/tests/groupby/test_categorical.py index 30bf5eb39cf51..7c17e3645639b 100644 --- a/pandas/tests/groupby/test_categorical.py +++ b/pandas/tests/groupby/test_categorical.py @@ -103,9 +103,6 @@ def test_basic(): # TODO: split this test gb = df.groupby("A", observed=False) exp_idx = CategoricalIndex(["a", "b", "z"], name="A", ordered=True) expected = DataFrame({"values": Series([3, 7, 0], index=exp_idx)}) - msg = "category type does not support sum operations" - with pytest.raises(TypeError, match=msg): - gb.sum() result = gb.sum(numeric_only=True) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 5384b228850f4..00e5d7ce472dc 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -465,8 +465,6 @@ def test_multi_func(df): col2 = df["B"] grouped = df.groupby([col1.get, col2.get]) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() agged = grouped.mean(numeric_only=True) expected = df.groupby(["A", "B"]).mean() @@ -663,17 +661,11 @@ def test_groupby_as_index_agg(df): # single-key - with pytest.raises(TypeError, match="Could not convert"): - grouped.agg(np.mean) result = grouped[["C", "D"]].agg(np.mean) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) result2 = grouped.agg({"C": np.mean, "D": np.sum}) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected2 = grouped.mean(numeric_only=True) expected2["D"] = grouped.sum()["D"] tm.assert_frame_equal(result2, expected2) @@ -791,11 +783,7 @@ def test_groupby_as_index_cython(df): # single-key grouped = data.groupby("A", as_index=False) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() result = grouped.mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - data.groupby(["A"]).mean() expected = data.groupby(["A"]).mean(numeric_only=True) expected.insert(0, "A", expected.index) expected.index = RangeIndex(len(expected)) @@ -958,11 +946,7 @@ def test_empty_groups_corner(mframe): ) grouped = df.groupby(["k1", "k2"]) - with pytest.raises(TypeError, match="Could not convert"): - grouped.agg(np.mean) result = grouped[["v1", "v2"]].agg(np.mean) - with pytest.raises(TypeError, match="Could not convert"): - grouped.mean() expected = grouped.mean(numeric_only=True) tm.assert_frame_equal(result, expected) @@ -1146,8 +1130,6 @@ def test_groupby_with_hier_columns(): # add a nuisance column sorted_columns, _ = columns.sortlevel(0) df["A", "foo"] = "bar" - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(level=0).mean() result = df.groupby(level=0).mean(numeric_only=True) tm.assert_index_equal(result.columns, df.columns[:-1]) @@ -1181,11 +1163,7 @@ def test_groupby_wrong_multi_labels(): def test_groupby_series_with_name(df): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"]).mean() result = df.groupby(df["A"]).mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"], as_index=False).mean() result2 = df.groupby(df["A"], as_index=False).mean(numeric_only=True) assert result.index.name == "A" assert "A" in result2 @@ -1335,11 +1313,7 @@ def test_groupby_unit64_float_conversion(): def test_groupby_list_infer_array_like(df): - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(list(df["A"])).mean() result = df.groupby(list(df["A"])).mean(numeric_only=True) - with pytest.raises(TypeError, match="Could not convert"): - df.groupby(df["A"]).mean() expected = df.groupby(df["A"]).mean(numeric_only=True) tm.assert_frame_equal(result, expected, check_names=False) @@ -1453,8 +1427,6 @@ def test_groupby_2d_malformed(): d["zeros"] = [0, 0] d["ones"] = [1, 1] d["label"] = ["l1", "l2"] - with pytest.raises(TypeError, match="Could not convert"): - d.groupby(["group"]).mean() tmp = d.groupby(["group"]).mean(numeric_only=True) res_values = np.array([[0.0, 1.0], [0.0, 1.0]]) tm.assert_index_equal(tmp.columns, Index(["zeros", "ones"])) diff --git a/pandas/tests/groupby/test_index_as_string.py b/pandas/tests/groupby/test_index_as_string.py index d3d34dfd6f90a..e32b5eb44d5dc 100644 --- a/pandas/tests/groupby/test_index_as_string.py +++ b/pandas/tests/groupby/test_index_as_string.py @@ -48,12 +48,7 @@ def series(): ) def test_grouper_index_level_as_string(frame, key_strs, groupers): if "B" not in key_strs or "outer" in frame.columns: - with pytest.raises(TypeError, match="Could not convert"): - frame.groupby(key_strs).mean() result = frame.groupby(key_strs).mean(numeric_only=True) - - with pytest.raises(TypeError, match="Could not convert"): - frame.groupby(groupers).mean() expected = frame.groupby(groupers).mean(numeric_only=True) else: result = frame.groupby(key_strs).mean() diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index cc3f468349efb..4283e9001c92c 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -6,7 +6,10 @@ import pytest -from pandas import DataFrame +from pandas import ( + Categorical, + DataFrame, +) from pandas.tests.groupby import get_groupby_method_args @@ -176,3 +179,115 @@ def func(x): with pytest.raises(TypeError, match="Test error message"): getattr(gb, how)(func) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_category(how, groupby_func, as_index, sort): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": Categorical( + ["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True + ), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby("a", as_index=as_index, sort=sort) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": ( + TypeError, + r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'", + ), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": (NotImplementedError, "category dtype not supported"), + "cummin": (NotImplementedError, "category dtype not supported"), + "cumprod": (TypeError, "category type does not support cumprod operations"), + "cumsum": (TypeError, "category type does not support cumsum operations"), + "diff": ( + TypeError, + r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'", + ), + "ffill": (None, ""), + "fillna": ( + TypeError, + r"Cannot setitem on a Categorical with a new category \(0\), set the categories first", + ), + "first": (None, ""), + "idxmax": (None, ""), + "idxmin": (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + "median": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'median'", + ), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": ( + TypeError, + r"unsupported operand type\(s\) for /: 'Categorical' and 'Categorical'", + ), + "prod": (TypeError, "category type does not support prod operations"), + "quantile": (TypeError, "No matching signature found"), + "rank": (None, ""), + "sem": (ValueError, "Cannot cast object dtype to float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'skew'", + ), + "std": (ValueError, "Cannot cast object dtype to float64"), + "sum": (TypeError, "category type does not support sum operations"), + "var": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'var'", + ), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + + +@pytest.mark.parametrize("how", ["agg", "transform"]) +def test_groupby_raises_category_udf(how): + df = DataFrame( + { + "a": [1, 1, 1, 2, 2], + "b": range(5), + "c": Categorical( + ["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True + ), + } + ) + gb = df.groupby("a") + + def func(x): + raise TypeError("Test error message") + + with pytest.raises(TypeError, match="Test error message"): + getattr(gb, how)(func) From bf981af00efa597d92219881e7133f722d526f66 Mon Sep 17 00:00:00 2001 From: Alex Date: Sat, 14 Jan 2023 20:51:40 +0100 Subject: [PATCH 2/6] TST: pre-commit error --- pandas/tests/groupby/test_raises.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 4283e9001c92c..b33977c42f730 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -216,7 +216,12 @@ def test_groupby_raises_category(how, groupby_func, as_index, sort): "ffill": (None, ""), "fillna": ( TypeError, - r"Cannot setitem on a Categorical with a new category \(0\), set the categories first", + ", ".join( + [ + r"Cannot setitem on a Categorical with a new category \(0\)", + "set the categories first", + ] + ), ), "first": (None, ""), "idxmax": (None, ""), From dbc134353b7127a0d4af3df02e14309e92221740 Mon Sep 17 00:00:00 2001 From: Alex Date: Sun, 29 Jan 2023 23:55:26 +0100 Subject: [PATCH 3/6] enhance groupby test_raises --- pandas/tests/groupby/test_raises.py | 366 ++++++++++++++++++++++++---- 1 file changed, 319 insertions(+), 47 deletions(-) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index b33977c42f730..0906dbf6cf791 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -4,26 +4,58 @@ import datetime +import numpy as np import pytest from pandas import ( Categorical, DataFrame, + Grouper, + Series, ) from pandas.tests.groupby import get_groupby_method_args +@pytest.fixture( + params=[ + "a", + ["a"], + ["a", "b"], + Grouper(key="a"), + lambda x: x % 2, + [0, 0, 0, 1, 2, 2, 2, 3, 3], + np.array([0, 0, 0, 1, 2, 2, 2, 3, 3]), + dict(zip(range(9), [0, 0, 0, 1, 2, 2, 2, 3, 3])), + Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), + [Series([1, 1, 1, 1, 1, 2, 2, 2, 2]), Series([3, 3, 4, 4, 4, 4, 4, 3, 3])], + ] +) +def by(request): + return request.param + + +@pytest.fixture(params=[True, False]) +def groupby_serie(request): + return request.param + + @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_string(how, groupby_func, as_index, sort): +def test_groupby_raises_string(how, by, groupby_serie, groupby_func, as_index, sort): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": list("xyzwt"), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index, sort=sort) + gb = df.groupby(by=by, as_index=as_index, sort=sort) + + if groupby_serie: + if groupby_func == "corrwith": + pytest.skip() + gb = gb["d"] klass, msg = { "all": (None, ""), @@ -32,10 +64,22 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): "corrwith": (TypeError, "Could not convert"), "count": (None, ""), "cumcount": (None, ""), - "cummax": (NotImplementedError, "function is not implemented for this dtype"), - "cummin": (NotImplementedError, "function is not implemented for this dtype"), - "cumprod": (NotImplementedError, "function is not implemented for this dtype"), - "cumsum": (NotImplementedError, "function is not implemented for this dtype"), + "cummax": ( + (NotImplementedError, TypeError), + "(function|cummax) is not (implemented|supported) for (this|object) dtype", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(function|cummin) is not (implemented|supported) for (this|object) dtype", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(function|cumprod) is not (implemented|supported) for (this|object) dtype", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(function|cumsum) is not (implemented|supported) for (this|object) dtype", + ), "diff": (TypeError, "unsupported operand type"), "ffill": (None, ""), "fillna": (None, ""), @@ -44,7 +88,7 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): "idxmin": (TypeError, "'argmin' not allowed for this dtype"), "last": (None, ""), "max": (None, ""), - "mean": (TypeError, "Could not convert xyz to numeric"), + "mean": (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"), "median": (TypeError, "could not convert string to float"), "min": (None, ""), "ngroup": (None, ""), @@ -80,15 +124,19 @@ def test_groupby_raises_string(how, groupby_func, as_index, sort): @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_string_udf(how): +def test_groupby_raises_string_udf(how, by, groupby_serie): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": list("xyzwt"), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), } ) - gb = df.groupby("a") + gb = df.groupby(by=by) + + if groupby_serie: + gb = gb["d"] def func(x): raise TypeError("Test error message") @@ -97,17 +145,51 @@ def func(x): getattr(gb, how)(func) +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_string_np(how, by, groupby_serie, groupby_func_np): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": list("xyzwtyuio"), + } + ) + gb = df.groupby(by=by) + + if groupby_serie: + gb = gb["d"] + + klass, msg = { + np.sum: (None, ""), + np.mean: (TypeError, "Could not convert xy?z?w?t?y?u?i?o? to numeric"), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_datetime(how, groupby_func, as_index, sort): +def test_groupby_raises_datetime(how, by, groupby_serie, groupby_func, as_index, sort): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index, sort=sort) + gb = df.groupby(by=by, as_index=as_index, sort=sort) + + if groupby_serie: + if groupby_func == "corrwith": + pytest.skip() + gb = gb["d"] klass, msg = { "all": (None, ""), @@ -164,15 +246,20 @@ def test_groupby_raises_datetime(how, groupby_func, as_index, sort): @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_datetime_udf(how): +def test_groupby_raises_datetime_udf(how, by, groupby_serie): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), } ) - gb = df.groupby("a") + + gb = df.groupby(by=by) + + if groupby_serie: + gb = gb["d"] def func(x): raise TypeError("Test error message") @@ -181,19 +268,55 @@ def func(x): getattr(gb, how)(func) +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_datetime_np(how, by, groupby_serie, groupby_func_np): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": datetime.datetime(2005, 1, 1, 10, 30, 23, 540000), + } + ) + gb = df.groupby(by=by) + + if groupby_serie: + gb = gb["d"] + + klass, msg = { + np.sum: (TypeError, "datetime64 type does not support sum operations"), + np.mean: (None, ""), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_category(how, groupby_func, as_index, sort): +def test_groupby_raises_category(how, by, groupby_serie, groupby_func, as_index, sort): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": Categorical( - ["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, ), } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby("a", as_index=as_index, sort=sort) + gb = df.groupby(by=by, as_index=as_index, sort=sort) + + if groupby_serie: + if groupby_func == "corrwith": + pytest.skip() + gb = gb["d"] klass, msg = { "all": (None, ""), @@ -205,10 +328,30 @@ def test_groupby_raises_category(how, groupby_func, as_index, sort): ), "count": (None, ""), "cumcount": (None, ""), - "cummax": (NotImplementedError, "category dtype not supported"), - "cummin": (NotImplementedError, "category dtype not supported"), - "cumprod": (TypeError, "category type does not support cumprod operations"), - "cumsum": (TypeError, "category type does not support cumsum operations"), + "cummax": ( + (NotImplementedError, TypeError), + "(category type does not support cummax operations|" + + "category dtype not supported|" + + "cummax is not supported for category dtype)", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(category type does not support cummin operations|" + + "category dtype not supported|" + "cummin is not supported for category dtype)", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(category type does not support cumprod operations|" + + "category dtype not supported|" + "cumprod is not supported for category dtype)", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(category type does not support cumsum operations|" + + "category dtype not supported|" + "cumsum is not supported for category dtype)", + ), "diff": ( TypeError, r"unsupported operand type\(s\) for -: 'Categorical' and 'Categorical'", @@ -216,12 +359,8 @@ def test_groupby_raises_category(how, groupby_func, as_index, sort): "ffill": (None, ""), "fillna": ( TypeError, - ", ".join( - [ - r"Cannot setitem on a Categorical with a new category \(0\)", - "set the categories first", - ] - ), + r"Cannot setitem on a Categorical with a new category \(0\), " + + "set the categories first", ), "first": (None, ""), "idxmax": (None, ""), @@ -279,20 +418,153 @@ def test_groupby_raises_category(how, groupby_func, as_index, sort): @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_category_udf(how): +def test_groupby_raises_category_udf(how, by, groupby_serie): df = DataFrame( { - "a": [1, 1, 1, 2, 2], - "b": range(5), - "c": Categorical( - ["a", "a", "b", "c", "c"], categories=["a", "b", "c", "d"], ordered=True + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, ), } ) - gb = df.groupby("a") + gb = df.groupby(by=by) + + if groupby_serie: + gb = gb["d"] def func(x): raise TypeError("Test error message") with pytest.raises(TypeError, match="Test error message"): getattr(gb, how)(func) + + +@pytest.mark.parametrize("how", ["method", "agg", "transform"]) +def test_groupby_raises_category_on_category( + how, by, groupby_serie, groupby_func, observed +): + df = DataFrame( + { + "a": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "c", "c", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + } + ) + args = get_groupby_method_args(groupby_func, df) + gb = df.groupby(by=by, observed=observed) + + if groupby_serie: + if groupby_func == "corrwith": + pytest.skip() + gb = gb["d"] + + empty_groups = any(group.empty for group in gb.groups.values()) + + klass, msg = { + "all": (None, ""), + "any": (None, ""), + "bfill": (None, ""), + "corrwith": ( + TypeError, + r"unsupported operand type\(s\) for \*: 'Categorical' and 'int'", + ), + "count": (None, ""), + "cumcount": (None, ""), + "cummax": ( + (NotImplementedError, TypeError), + "(cummax is not supported for category dtype|" + + "category dtype not supported|" + + "category type does not support cummax operations)", + ), + "cummin": ( + (NotImplementedError, TypeError), + "(cummin is not supported for category dtype|" + + "category dtype not supported|" + "category type does not support cummin operations)", + ), + "cumprod": ( + (NotImplementedError, TypeError), + "(cumprod is not supported for category dtype|" + + "category dtype not supported|" + "category type does not support cumprod operations)", + ), + "cumsum": ( + (NotImplementedError, TypeError), + "(cumsum is not supported for category dtype|" + + "category dtype not supported|" + + "category type does not support cumsum operations)", + ), + "diff": (TypeError, "unsupported operand type"), + "ffill": (None, ""), + "fillna": ( + TypeError, + r"Cannot setitem on a Categorical with a new category \(0\), " + + "set the categories first", + ), + "first": (None, ""), + "idxmax": (ValueError, "attempt to get argmax of an empty sequence") + if empty_groups + else (None, ""), + "idxmin": (ValueError, "attempt to get argmin of an empty sequence") + if empty_groups + else (None, ""), + "last": (None, ""), + "max": (None, ""), + "mean": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + "median": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'median'", + ), + "min": (None, ""), + "ngroup": (None, ""), + "nunique": (None, ""), + "pct_change": (TypeError, "unsupported operand type"), + "prod": (TypeError, "category type does not support prod operations"), + "quantile": (TypeError, ""), + "rank": (None, ""), + "sem": (ValueError, "Cannot cast object dtype to float64"), + "shift": (None, ""), + "size": (None, ""), + "skew": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'skew'", + ), + "std": (ValueError, "Cannot cast object dtype to float64"), + "sum": (TypeError, "category type does not support sum operations"), + "var": ( + TypeError, + "'Categorical' with dtype category does not support reduction 'var'", + ), + }[groupby_func] + + if klass is None: + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) + else: + with pytest.raises(klass, match=msg): + if how == "method": + getattr(gb, groupby_func)(*args) + elif how == "agg": + gb.agg(groupby_func, *args) + else: + gb.transform(groupby_func, *args) From 012de275caa17eaa28911f25facc8c709996a64f Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 3 Feb 2023 21:26:49 +0100 Subject: [PATCH 4/6] link pr --- pandas/tests/groupby/test_raises.py | 79 ++++++++++++++++++++++------- 1 file changed, 60 insertions(+), 19 deletions(-) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index 0906dbf6cf791..05b63416b6eef 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -35,12 +35,12 @@ def by(request): @pytest.fixture(params=[True, False]) -def groupby_serie(request): +def groupby_series(request): return request.param @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_string(how, by, groupby_serie, groupby_func, as_index, sort): +def test_groupby_raises_string(how, by, groupby_series, groupby_func, as_index, sort): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -52,7 +52,7 @@ def test_groupby_raises_string(how, by, groupby_serie, groupby_func, as_index, s args = get_groupby_method_args(groupby_func, df) gb = df.groupby(by=by, as_index=as_index, sort=sort) - if groupby_serie: + if groupby_series: if groupby_func == "corrwith": pytest.skip() gb = gb["d"] @@ -124,7 +124,7 @@ def test_groupby_raises_string(how, by, groupby_serie, groupby_func, as_index, s @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_string_udf(how, by, groupby_serie): +def test_groupby_raises_string_udf(how, by, groupby_series): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -135,7 +135,7 @@ def test_groupby_raises_string_udf(how, by, groupby_serie): ) gb = df.groupby(by=by) - if groupby_serie: + if groupby_series: gb = gb["d"] def func(x): @@ -145,9 +145,10 @@ def func(x): getattr(gb, how)(func) +# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) -def test_groupby_raises_string_np(how, by, groupby_serie, groupby_func_np): +def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -158,7 +159,7 @@ def test_groupby_raises_string_np(how, by, groupby_serie, groupby_func_np): ) gb = df.groupby(by=by) - if groupby_serie: + if groupby_series: gb = gb["d"] klass, msg = { @@ -174,7 +175,7 @@ def test_groupby_raises_string_np(how, by, groupby_serie, groupby_func_np): @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_datetime(how, by, groupby_serie, groupby_func, as_index, sort): +def test_groupby_raises_datetime(how, by, groupby_series, groupby_func, as_index, sort): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -186,7 +187,7 @@ def test_groupby_raises_datetime(how, by, groupby_serie, groupby_func, as_index, args = get_groupby_method_args(groupby_func, df) gb = df.groupby(by=by, as_index=as_index, sort=sort) - if groupby_serie: + if groupby_series: if groupby_func == "corrwith": pytest.skip() gb = gb["d"] @@ -246,7 +247,7 @@ def test_groupby_raises_datetime(how, by, groupby_serie, groupby_func, as_index, @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_datetime_udf(how, by, groupby_serie): +def test_groupby_raises_datetime_udf(how, by, groupby_series): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -258,7 +259,7 @@ def test_groupby_raises_datetime_udf(how, by, groupby_serie): gb = df.groupby(by=by) - if groupby_serie: + if groupby_series: gb = gb["d"] def func(x): @@ -268,9 +269,10 @@ def func(x): getattr(gb, how)(func) +# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) -def test_groupby_raises_datetime_np(how, by, groupby_serie, groupby_func_np): +def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -281,7 +283,7 @@ def test_groupby_raises_datetime_np(how, by, groupby_serie, groupby_func_np): ) gb = df.groupby(by=by) - if groupby_serie: + if groupby_series: gb = gb["d"] klass, msg = { @@ -296,8 +298,9 @@ def test_groupby_raises_datetime_np(how, by, groupby_serie, groupby_func_np): getattr(gb, how)(groupby_func_np) +# GH#50749 @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_category(how, by, groupby_serie, groupby_func, as_index, sort): +def test_groupby_raises_category(how, by, groupby_series, groupby_func, as_index, sort): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -313,7 +316,7 @@ def test_groupby_raises_category(how, by, groupby_serie, groupby_func, as_index, args = get_groupby_method_args(groupby_func, df) gb = df.groupby(by=by, as_index=as_index, sort=sort) - if groupby_serie: + if groupby_series: if groupby_func == "corrwith": pytest.skip() gb = gb["d"] @@ -417,8 +420,9 @@ def test_groupby_raises_category(how, by, groupby_serie, groupby_func, as_index, gb.transform(groupby_func, *args) +# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) -def test_groupby_raises_category_udf(how, by, groupby_serie): +def test_groupby_raises_category_udf(how, by, groupby_series): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -433,7 +437,7 @@ def test_groupby_raises_category_udf(how, by, groupby_serie): ) gb = df.groupby(by=by) - if groupby_serie: + if groupby_series: gb = gb["d"] def func(x): @@ -443,9 +447,46 @@ def func(x): getattr(gb, how)(func) +# GH#50749 +@pytest.mark.parametrize("how", ["agg", "transform"]) +@pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) +def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np): + df = DataFrame( + { + "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], + "b": [3, 3, 4, 4, 4, 4, 4, 3, 3], + "c": range(9), + "d": Categorical( + ["a", "a", "a", "a", "b", "b", "b", "b", "c"], + categories=["a", "b", "c", "d"], + ordered=True, + ), + } + ) + gb = df.groupby(by=by) + + if groupby_series: + gb = gb["d"] + + klass, msg = { + np.sum: (TypeError, "category type does not support sum operations"), + np.mean: ( + TypeError, + "'Categorical' with dtype category does not support reduction 'mean'", + ), + }[groupby_func_np] + + if klass is None: + getattr(gb, how)(groupby_func_np) + else: + with pytest.raises(klass, match=msg): + getattr(gb, how)(groupby_func_np) + + +# GH#50749 @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_category_on_category( - how, by, groupby_serie, groupby_func, observed + how, by, groupby_series, groupby_func, observed ): df = DataFrame( { @@ -466,7 +507,7 @@ def test_groupby_raises_category_on_category( args = get_groupby_method_args(groupby_func, df) gb = df.groupby(by=by, observed=observed) - if groupby_serie: + if groupby_series: if groupby_func == "corrwith": pytest.skip() gb = gb["d"] From f4bcf3609e910fd25a764faadbf422b9af9496c5 Mon Sep 17 00:00:00 2001 From: Alex Date: Fri, 3 Feb 2023 21:40:12 +0100 Subject: [PATCH 5/6] typo --- pandas/tests/groupby/test_raises.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index f708929efecfa..e683a7ae7cd73 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -145,10 +145,10 @@ def func(x): getattr(gb, how)(func) -# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np): + # GH#50749 df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -269,10 +269,10 @@ def func(x): getattr(gb, how)(func) -# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np): + # GH#50749 df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -298,9 +298,9 @@ def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np): getattr(gb, how)(groupby_func_np) -# GH#50749 @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_category(how, by, groupby_series, groupby_func, as_index, sort): + # GH#50749 df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -420,9 +420,9 @@ def test_groupby_raises_category(how, by, groupby_series, groupby_func, as_index gb.transform(groupby_func, *args) -# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) def test_groupby_raises_category_udf(how, by, groupby_series): + # GH#50749 df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -447,10 +447,10 @@ def func(x): getattr(gb, how)(func) -# GH#50749 @pytest.mark.parametrize("how", ["agg", "transform"]) @pytest.mark.parametrize("groupby_func_np", [np.sum, np.mean]) def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np): + # GH#50749 df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -483,11 +483,11 @@ def test_groupby_raises_category_np(how, by, groupby_series, groupby_func_np): getattr(gb, how)(groupby_func_np) -# GH#50749 @pytest.mark.parametrize("how", ["method", "agg", "transform"]) def test_groupby_raises_category_on_category( how, by, groupby_series, groupby_func, observed ): + # GH#50749 df = DataFrame( { "a": Categorical( From 3d2ec033a5bb982ddfeb600ee802250880042750 Mon Sep 17 00:00:00 2001 From: Alex Date: Thu, 9 Feb 2023 16:10:34 +0100 Subject: [PATCH 6/6] remove sort and as_index and assert corrwith for series --- pandas/tests/groupby/test_raises.py | 36 ++++++++++++++++++----------- 1 file changed, 22 insertions(+), 14 deletions(-) diff --git a/pandas/tests/groupby/test_raises.py b/pandas/tests/groupby/test_raises.py index e683a7ae7cd73..6ceb23a3c44b6 100644 --- a/pandas/tests/groupby/test_raises.py +++ b/pandas/tests/groupby/test_raises.py @@ -40,7 +40,7 @@ def groupby_series(request): @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_string(how, by, groupby_series, groupby_func, as_index, sort): +def test_groupby_raises_string(how, by, groupby_series, groupby_func): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -50,13 +50,15 @@ def test_groupby_raises_string(how, by, groupby_series, groupby_func, as_index, } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby(by=by, as_index=as_index, sort=sort) + gb = df.groupby(by=by) if groupby_series: - if groupby_func == "corrwith": - pytest.skip() gb = gb["d"] + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + klass, msg = { "all": (None, ""), "any": (None, ""), @@ -175,7 +177,7 @@ def test_groupby_raises_string_np(how, by, groupby_series, groupby_func_np): @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_datetime(how, by, groupby_series, groupby_func, as_index, sort): +def test_groupby_raises_datetime(how, by, groupby_series, groupby_func): df = DataFrame( { "a": [1, 1, 1, 1, 1, 2, 2, 2, 2], @@ -185,13 +187,15 @@ def test_groupby_raises_datetime(how, by, groupby_series, groupby_func, as_index } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby(by=by, as_index=as_index, sort=sort) + gb = df.groupby(by=by) if groupby_series: - if groupby_func == "corrwith": - pytest.skip() gb = gb["d"] + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + klass, msg = { "all": (None, ""), "any": (None, ""), @@ -299,7 +303,7 @@ def test_groupby_raises_datetime_np(how, by, groupby_series, groupby_func_np): @pytest.mark.parametrize("how", ["method", "agg", "transform"]) -def test_groupby_raises_category(how, by, groupby_series, groupby_func, as_index, sort): +def test_groupby_raises_category(how, by, groupby_series, groupby_func): # GH#50749 df = DataFrame( { @@ -314,13 +318,15 @@ def test_groupby_raises_category(how, by, groupby_series, groupby_func, as_index } ) args = get_groupby_method_args(groupby_func, df) - gb = df.groupby(by=by, as_index=as_index, sort=sort) + gb = df.groupby(by=by) if groupby_series: - if groupby_func == "corrwith": - pytest.skip() gb = gb["d"] + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + klass, msg = { "all": (None, ""), "any": (None, ""), @@ -508,10 +514,12 @@ def test_groupby_raises_category_on_category( gb = df.groupby(by=by, observed=observed) if groupby_series: - if groupby_func == "corrwith": - pytest.skip() gb = gb["d"] + if groupby_func == "corrwith": + assert not hasattr(gb, "corrwith") + return + empty_groups = any(group.empty for group in gb.groups.values()) klass, msg = {