diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 882f42ff18bdd..78b99a00d43ce 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -356,6 +356,12 @@ def test_agg_multiple_functions_maintain_order(df): tm.assert_index_equal(result.columns, exp_cols) +def test_series_index_name(df): + grouped = df.loc[:, ["C"]].groupby(df["A"]) + result = grouped.agg(lambda x: x.mean()) + assert result.index.name == "A" + + def test_agg_multiple_functions_same_name(): # GH 30880 df = DataFrame( diff --git a/pandas/tests/groupby/methods/test_describe.py b/pandas/tests/groupby/methods/test_describe.py index f38de8faddb59..c2ffcb04caa60 100644 --- a/pandas/tests/groupby/methods/test_describe.py +++ b/pandas/tests/groupby/methods/test_describe.py @@ -219,3 +219,73 @@ def test_describe_duplicate_columns(): ) expected.index.names = [1] tm.assert_frame_equal(result, expected) + + +class TestGroupByNonCythonPaths: + # GH#5610 non-cython calls should not include the grouper + # Tests for code not expected to go through cython paths. + + @pytest.fixture + def df(self): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + return df + + @pytest.fixture + def gb(self, df): + gb = df.groupby("A") + return gb + + @pytest.fixture + def gni(self, df): + gni = df.groupby("A", as_index=False) + return gni + + def test_describe(self, df, gb, gni): + # describe + expected_index = Index([1, 3], name="A") + expected_col = MultiIndex( + levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], + codes=[[0] * 8, list(range(8))], + ) + expected = DataFrame( + [ + [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], + [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], + ], + index=expected_index, + columns=expected_col, + ) + result = gb.describe() + tm.assert_frame_equal(result, expected) + + expected = expected.reset_index() + result = gni.describe() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("dtype", [int, float, object]) +@pytest.mark.parametrize( + "kwargs", + [ + {"percentiles": [0.10, 0.20, 0.30], "include": "all", "exclude": None}, + {"percentiles": [0.10, 0.20, 0.30], "include": None, "exclude": ["int"]}, + {"percentiles": [0.10, 0.20, 0.30], "include": ["int"], "exclude": None}, + ], +) +def test_groupby_empty_dataset(dtype, kwargs): + # GH#41575 + df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"], dtype=dtype) + df["B"] = df["B"].astype(int) + df["C"] = df["C"].astype(float) + + result = df.iloc[:0].groupby("A").describe(**kwargs) + expected = df.groupby("A").describe(**kwargs).reset_index(drop=True).iloc[:0] + tm.assert_frame_equal(result, expected) + + result = df.iloc[:0].groupby("A").B.describe(**kwargs) + expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0] + expected.index = Index([]) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 2f2648b9293c5..60b386adb664a 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1559,3 +1559,45 @@ def test_include_groups(include_groups): if not include_groups: expected = expected[["b"]] tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("f", [max, min, sum]) +@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key +def test_builtins_apply(keys, f): + # see gh-8155 + rs = np.random.default_rng(2) + df = DataFrame(rs.integers(1, 7, (10, 2)), columns=["jim", "joe"]) + df["jolie"] = rs.standard_normal(10) + + gb = df.groupby(keys) + + fname = f.__name__ + + warn = None if f is not sum else FutureWarning + msg = "The behavior of DataFrame.sum with axis=None is deprecated" + with tm.assert_produces_warning( + warn, match=msg, check_stacklevel=False, raise_on_extra_warnings=False + ): + # Also warns on deprecation GH#53425 + result = gb.apply(f) + ngroups = len(df.drop_duplicates(subset=keys)) + + assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" + assert result.shape == (ngroups, 3), assert_msg + + npfunc = lambda x: getattr(np, fname)(x, axis=0) # numpy's equivalent function + msg = "DataFrameGroupBy.apply operated on the grouping columns" + with tm.assert_produces_warning(FutureWarning, match=msg): + expected = gb.apply(npfunc) + tm.assert_frame_equal(result, expected) + + with tm.assert_produces_warning(FutureWarning, match=msg): + expected2 = gb.apply(lambda x: npfunc(x)) + tm.assert_frame_equal(result, expected2) + + if f != sum: + expected = gb.agg(fname).reset_index() + expected.set_index(keys, inplace=True, drop=False) + tm.assert_frame_equal(result, expected, check_dtype=False) + + tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0)) diff --git a/pandas/tests/groupby/test_cumulative.py b/pandas/tests/groupby/test_cumulative.py index eecb82cd5050b..25534865b3486 100644 --- a/pandas/tests/groupby/test_cumulative.py +++ b/pandas/tests/groupby/test_cumulative.py @@ -289,3 +289,30 @@ def test_nullable_int_not_cast_as_float(method, dtype, val): expected = DataFrame({"b": data}, dtype=dtype) tm.assert_frame_equal(result, expected) + + +def test_cython_api2(): + # this takes the fast apply path + + # cumsum (GH5614) + df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=["A", "B", "C"]) + expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=["B", "C"]) + result = df.groupby("A").cumsum() + tm.assert_frame_equal(result, expected) + + # GH 5755 - cumsum is a transformer and should ignore as_index + result = df.groupby("A", as_index=False).cumsum() + tm.assert_frame_equal(result, expected) + + # GH 13994 + msg = "DataFrameGroupBy.cumsum with axis=1 is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").cumsum(axis=1) + expected = df.cumsum(axis=1) + tm.assert_frame_equal(result, expected) + + msg = "DataFrameGroupBy.cumprod with axis=1 is deprecated" + with tm.assert_produces_warning(FutureWarning, match=msg): + result = df.groupby("A").cumprod(axis=1) + expected = df.cumprod(axis=1) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index b840443aab347..399ea534ae373 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1,4 +1,3 @@ -import builtins import re import numpy as np @@ -10,73 +9,12 @@ from pandas import ( DataFrame, Index, - MultiIndex, Series, Timestamp, date_range, ) import pandas._testing as tm from pandas.tests.groupby import get_groupby_method_args -from pandas.util import _test_decorators as td - - -def test_intercept_builtin_sum(): - s = Series([1.0, 2.0, np.nan, 3.0]) - grouped = s.groupby([0, 1, 2, 2]) - - msg = "using SeriesGroupBy.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result = grouped.agg(builtins.sum) - msg = "using np.sum" - with tm.assert_produces_warning(FutureWarning, match=msg): - # GH#53425 - result2 = grouped.apply(builtins.sum) - expected = grouped.sum() - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result2, expected) - - -@pytest.mark.parametrize("f", [max, min, sum]) -@pytest.mark.parametrize("keys", ["jim", ["jim", "joe"]]) # Single key # Multi-key -def test_builtins_apply(keys, f): - # see gh-8155 - rs = np.random.default_rng(2) - df = DataFrame(rs.integers(1, 7, (10, 2)), columns=["jim", "joe"]) - df["jolie"] = rs.standard_normal(10) - - gb = df.groupby(keys) - - fname = f.__name__ - - warn = None if f is not sum else FutureWarning - msg = "The behavior of DataFrame.sum with axis=None is deprecated" - with tm.assert_produces_warning( - warn, match=msg, check_stacklevel=False, raise_on_extra_warnings=False - ): - # Also warns on deprecation GH#53425 - result = gb.apply(f) - ngroups = len(df.drop_duplicates(subset=keys)) - - assert_msg = f"invalid frame shape: {result.shape} (expected ({ngroups}, 3))" - assert result.shape == (ngroups, 3), assert_msg - - npfunc = lambda x: getattr(np, fname)(x, axis=0) # numpy's equivalent function - msg = "DataFrameGroupBy.apply operated on the grouping columns" - with tm.assert_produces_warning(FutureWarning, match=msg): - expected = gb.apply(npfunc) - tm.assert_frame_equal(result, expected) - - with tm.assert_produces_warning(FutureWarning, match=msg): - expected2 = gb.apply(lambda x: npfunc(x)) - tm.assert_frame_equal(result, expected2) - - if f != sum: - expected = gb.agg(fname).reset_index() - expected.set_index(keys, inplace=True, drop=False) - tm.assert_frame_equal(result, expected, check_dtype=False) - - tm.assert_series_equal(getattr(result, fname)(axis=0), getattr(df, fname)(axis=0)) class TestNumericOnly: @@ -267,118 +205,6 @@ def _check(self, df, method, expected_columns, expected_columns_numeric): tm.assert_index_equal(result.columns, expected_columns) -class TestGroupByNonCythonPaths: - # GH#5610 non-cython calls should not include the grouper - # Tests for code not expected to go through cython paths. - - @pytest.fixture - def df(self): - df = DataFrame( - [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], - columns=["A", "B", "C"], - ) - return df - - @pytest.fixture - def gb(self, df): - gb = df.groupby("A") - return gb - - @pytest.fixture - def gni(self, df): - gni = df.groupby("A", as_index=False) - return gni - - def test_describe(self, df, gb, gni): - # describe - expected_index = Index([1, 3], name="A") - expected_col = MultiIndex( - levels=[["B"], ["count", "mean", "std", "min", "25%", "50%", "75%", "max"]], - codes=[[0] * 8, list(range(8))], - ) - expected = DataFrame( - [ - [1.0, 2.0, np.nan, 2.0, 2.0, 2.0, 2.0, 2.0], - [0.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], - ], - index=expected_index, - columns=expected_col, - ) - result = gb.describe() - tm.assert_frame_equal(result, expected) - - expected = expected.reset_index() - result = gni.describe() - tm.assert_frame_equal(result, expected) - - -def test_cython_api2(): - # this takes the fast apply path - - # cumsum (GH5614) - df = DataFrame([[1, 2, np.nan], [1, np.nan, 9], [3, 4, 9]], columns=["A", "B", "C"]) - expected = DataFrame([[2, np.nan], [np.nan, 9], [4, 9]], columns=["B", "C"]) - result = df.groupby("A").cumsum() - tm.assert_frame_equal(result, expected) - - # GH 5755 - cumsum is a transformer and should ignore as_index - result = df.groupby("A", as_index=False).cumsum() - tm.assert_frame_equal(result, expected) - - # GH 13994 - msg = "DataFrameGroupBy.cumsum with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").cumsum(axis=1) - expected = df.cumsum(axis=1) - tm.assert_frame_equal(result, expected) - - msg = "DataFrameGroupBy.cumprod with axis=1 is deprecated" - with tm.assert_produces_warning(FutureWarning, match=msg): - result = df.groupby("A").cumprod(axis=1) - expected = df.cumprod(axis=1) - tm.assert_frame_equal(result, expected) - - -@pytest.mark.parametrize( - "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"] -) -@pytest.mark.parametrize( - "method,data", - [ - ("first", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), - ("last", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), - ("min", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), - ("max", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), - ("count", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 2}], "out_type": "int64"}), - ], -) -def test_groupby_non_arithmetic_agg_types(dtype, method, data): - # GH9311, GH6620 - df = DataFrame( - [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] - ) - - df["b"] = df.b.astype(dtype) - - if "args" not in data: - data["args"] = [] - - if "out_type" in data: - out_type = data["out_type"] - else: - out_type = dtype - - exp = data["df"] - df_out = DataFrame(exp) - - df_out["b"] = df_out.b.astype(out_type) - df_out.set_index("a", inplace=True) - - grpd = df.groupby("a") - t = getattr(grpd, method)(*data["args"]) - tm.assert_frame_equal(t, df_out) - - @pytest.mark.parametrize( "i", [ @@ -493,78 +319,6 @@ def test_axis1_numeric_only(request, groupby_func, numeric_only): tm.assert_equal(result, expected) -def scipy_sem(*args, **kwargs): - from scipy.stats import sem - - return sem(*args, ddof=1, **kwargs) - - -@pytest.mark.parametrize( - "op,targop", - [ - ("mean", np.mean), - ("median", np.median), - ("std", np.std), - ("var", np.var), - ("sum", np.sum), - ("prod", np.prod), - ("min", np.min), - ("max", np.max), - ("first", lambda x: x.iloc[0]), - ("last", lambda x: x.iloc[-1]), - ("count", np.size), - pytest.param("sem", scipy_sem, marks=td.skip_if_no_scipy), - ], -) -def test_ops_general(op, targop): - df = DataFrame(np.random.default_rng(2).standard_normal(1000)) - labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) - - result = getattr(df.groupby(labels), op)() - warn = None if op in ("first", "last", "count", "sem") else FutureWarning - msg = f"using DataFrameGroupBy.{op}" - with tm.assert_produces_warning(warn, match=msg): - expected = df.groupby(labels).agg(targop) - tm.assert_frame_equal(result, expected) - - -def test_series_index_name(df): - grouped = df.loc[:, ["C"]].groupby(df["A"]) - result = grouped.agg(lambda x: x.mean()) - assert result.index.name == "A" - - -@pytest.mark.parametrize( - "values", - [ - { - "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], - "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], - }, - {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, - ], -) -@pytest.mark.parametrize("function", ["mean", "median", "var"]) -def test_apply_to_nullable_integer_returns_float(values, function): - # https://github.com/pandas-dev/pandas/issues/32219 - output = 0.5 if function == "var" else 1.5 - arr = np.array([output] * 3, dtype=float) - idx = Index([1, 2, 3], name="a", dtype="Int64") - expected = DataFrame({"b": arr}, index=idx).astype("Float64") - - groups = DataFrame(values, dtype="Int64").groupby("a") - - result = getattr(groups, function)() - tm.assert_frame_equal(result, expected) - - result = groups.agg(function) - tm.assert_frame_equal(result, expected) - - result = groups.agg([function]) - expected.columns = MultiIndex.from_tuples([("b", function)]) - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "kernel, has_arg", [ @@ -781,31 +535,6 @@ def test_deprecate_numeric_only_series(dtype, groupby_func, request): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("dtype", [int, float, object]) -@pytest.mark.parametrize( - "kwargs", - [ - {"percentiles": [0.10, 0.20, 0.30], "include": "all", "exclude": None}, - {"percentiles": [0.10, 0.20, 0.30], "include": None, "exclude": ["int"]}, - {"percentiles": [0.10, 0.20, 0.30], "include": ["int"], "exclude": None}, - ], -) -def test_groupby_empty_dataset(dtype, kwargs): - # GH#41575 - df = DataFrame([[1, 2, 3]], columns=["A", "B", "C"], dtype=dtype) - df["B"] = df["B"].astype(int) - df["C"] = df["C"].astype(float) - - result = df.iloc[:0].groupby("A").describe(**kwargs) - expected = df.groupby("A").describe(**kwargs).reset_index(drop=True).iloc[:0] - tm.assert_frame_equal(result, expected) - - result = df.iloc[:0].groupby("A").B.describe(**kwargs) - expected = df.groupby("A").B.describe(**kwargs).reset_index(drop=True).iloc[:0] - expected.index = Index([]) - tm.assert_frame_equal(result, expected) - - def test_multiindex_group_all_columns_when_empty(groupby_func): # GH 32464 df = DataFrame({"a": [], "b": [], "c": []}).set_index(["a", "b", "c"]) @@ -835,53 +564,3 @@ def test_duplicate_columns(request, groupby_func, as_index): if groupby_func not in ("size", "ngroup", "cumcount"): expected = expected.rename(columns={"c": "b"}) tm.assert_equal(result, expected) - - -@pytest.mark.parametrize( - "op", - [ - "sum", - "prod", - "min", - "max", - "median", - "mean", - "skew", - "std", - "var", - "sem", - ], -) -@pytest.mark.parametrize("axis", [0, 1]) -@pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.parametrize("sort", [True, False]) -def test_regression_allowlist_methods(op, axis, skipna, sort): - # GH6944 - # GH 17537 - # explicitly test the allowlist methods - raw_frame = DataFrame([0]) - if axis == 0: - frame = raw_frame - msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" - else: - frame = raw_frame.T - msg = "DataFrame.groupby with axis=1 is deprecated" - - with tm.assert_produces_warning(FutureWarning, match=msg): - grouped = frame.groupby(level=0, axis=axis, sort=sort) - - if op == "skew": - # skew has skipna - result = getattr(grouped, op)(skipna=skipna) - expected = frame.groupby(level=0).apply( - lambda h: getattr(h, op)(axis=axis, skipna=skipna) - ) - if sort: - expected = expected.sort_index(axis=axis) - tm.assert_frame_equal(result, expected) - else: - result = getattr(grouped, op)() - expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) - if sort: - expected = expected.sort_index(axis=axis) - tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_reductions.py b/pandas/tests/groupby/test_reductions.py index f48926e4d0c77..e5836ce1e61c9 100644 --- a/pandas/tests/groupby/test_reductions.py +++ b/pandas/tests/groupby/test_reductions.py @@ -17,6 +17,7 @@ isna, ) import pandas._testing as tm +from pandas.util import _test_decorators as td @pytest.mark.parametrize("agg_func", ["any", "all"]) @@ -793,6 +794,23 @@ def test_empty_categorical(observed): tm.assert_series_equal(result, expected) +def test_intercept_builtin_sum(): + s = Series([1.0, 2.0, np.nan, 3.0]) + grouped = s.groupby([0, 1, 2, 2]) + + msg = "using SeriesGroupBy.sum" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#53425 + result = grouped.agg(builtins.sum) + msg = "using np.sum" + with tm.assert_produces_warning(FutureWarning, match=msg): + # GH#53425 + result2 = grouped.apply(builtins.sum) + expected = grouped.sum() + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result2, expected) + + @pytest.mark.parametrize("min_count", [0, 10]) def test_groupby_sum_mincount_boolean(min_count): b = True @@ -853,3 +871,159 @@ def test_groupby_sum_timedelta_with_nat(): res = gb["b"].sum(min_count=2) expected = Series([td3, pd.NaT], dtype="m8[ns]", name="b", index=expected.index) tm.assert_series_equal(res, expected) + + +@pytest.mark.parametrize( + "dtype", ["int8", "int16", "int32", "int64", "float32", "float64", "uint64"] +) +@pytest.mark.parametrize( + "method,data", + [ + ("first", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("last", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("min", {"df": [{"a": 1, "b": 1}, {"a": 2, "b": 3}]}), + ("max", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 4}]}), + ("count", {"df": [{"a": 1, "b": 2}, {"a": 2, "b": 2}], "out_type": "int64"}), + ], +) +def test_groupby_non_arithmetic_agg_types(dtype, method, data): + # GH9311, GH6620 + df = DataFrame( + [{"a": 1, "b": 1}, {"a": 1, "b": 2}, {"a": 2, "b": 3}, {"a": 2, "b": 4}] + ) + + df["b"] = df.b.astype(dtype) + + if "args" not in data: + data["args"] = [] + + if "out_type" in data: + out_type = data["out_type"] + else: + out_type = dtype + + exp = data["df"] + df_out = DataFrame(exp) + + df_out["b"] = df_out.b.astype(out_type) + df_out.set_index("a", inplace=True) + + grpd = df.groupby("a") + t = getattr(grpd, method)(*data["args"]) + tm.assert_frame_equal(t, df_out) + + +def scipy_sem(*args, **kwargs): + from scipy.stats import sem + + return sem(*args, ddof=1, **kwargs) + + +@pytest.mark.parametrize( + "op,targop", + [ + ("mean", np.mean), + ("median", np.median), + ("std", np.std), + ("var", np.var), + ("sum", np.sum), + ("prod", np.prod), + ("min", np.min), + ("max", np.max), + ("first", lambda x: x.iloc[0]), + ("last", lambda x: x.iloc[-1]), + ("count", np.size), + pytest.param("sem", scipy_sem, marks=td.skip_if_no_scipy), + ], +) +def test_ops_general(op, targop): + df = DataFrame(np.random.default_rng(2).standard_normal(1000)) + labels = np.random.default_rng(2).integers(0, 50, size=1000).astype(float) + + result = getattr(df.groupby(labels), op)() + warn = None if op in ("first", "last", "count", "sem") else FutureWarning + msg = f"using DataFrameGroupBy.{op}" + with tm.assert_produces_warning(warn, match=msg): + expected = df.groupby(labels).agg(targop) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "values", + [ + { + "a": [1, 1, 1, 2, 2, 2, 3, 3, 3], + "b": [1, pd.NA, 2, 1, pd.NA, 2, 1, pd.NA, 2], + }, + {"a": [1, 1, 2, 2, 3, 3], "b": [1, 2, 1, 2, 1, 2]}, + ], +) +@pytest.mark.parametrize("function", ["mean", "median", "var"]) +def test_apply_to_nullable_integer_returns_float(values, function): + # https://github.com/pandas-dev/pandas/issues/32219 + output = 0.5 if function == "var" else 1.5 + arr = np.array([output] * 3, dtype=float) + idx = pd.Index([1, 2, 3], name="a", dtype="Int64") + expected = DataFrame({"b": arr}, index=idx).astype("Float64") + + groups = DataFrame(values, dtype="Int64").groupby("a") + + result = getattr(groups, function)() + tm.assert_frame_equal(result, expected) + + result = groups.agg(function) + tm.assert_frame_equal(result, expected) + + result = groups.agg([function]) + expected.columns = MultiIndex.from_tuples([("b", function)]) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "op", + [ + "sum", + "prod", + "min", + "max", + "median", + "mean", + "skew", + "std", + "var", + "sem", + ], +) +@pytest.mark.parametrize("axis", [0, 1]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize("sort", [True, False]) +def test_regression_allowlist_methods(op, axis, skipna, sort): + # GH6944 + # GH 17537 + # explicitly test the allowlist methods + raw_frame = DataFrame([0]) + if axis == 0: + frame = raw_frame + msg = "The 'axis' keyword in DataFrame.groupby is deprecated and will be" + else: + frame = raw_frame.T + msg = "DataFrame.groupby with axis=1 is deprecated" + + with tm.assert_produces_warning(FutureWarning, match=msg): + grouped = frame.groupby(level=0, axis=axis, sort=sort) + + if op == "skew": + # skew has skipna + result = getattr(grouped, op)(skipna=skipna) + expected = frame.groupby(level=0).apply( + lambda h: getattr(h, op)(axis=axis, skipna=skipna) + ) + if sort: + expected = expected.sort_index(axis=axis) + tm.assert_frame_equal(result, expected) + else: + result = getattr(grouped, op)() + expected = frame.groupby(level=0).apply(lambda h: getattr(h, op)(axis=axis)) + if sort: + expected = expected.sort_index(axis=axis) + tm.assert_frame_equal(result, expected)