diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py new file mode 100644 index 0000000000000..4123fb95002dd --- /dev/null +++ b/pandas/tests/groupby/test_any_all.py @@ -0,0 +1,70 @@ +import builtins + +import numpy as np +import pytest + +from pandas import ( + DataFrame, + Index, + isna, +) +import pandas._testing as tm + + +@pytest.mark.parametrize("agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["foo", "bar", "baz"], + ["foo", "", ""], + ["", "", ""], + [1, 2, 3], + [1, 0, 0], + [0, 0, 0], + [1.0, 2.0, 3.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [True, True, True], + [True, False, False], + [False, False, False], + [np.nan, np.nan, np.nan], + ], +) +def test_groupby_bool_aggs(agg_func, skipna, vals): + df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == "any": + exp = False + + exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) + result = getattr(df.groupby("key"), agg_func)(skipna=skipna) + tm.assert_frame_equal(result, exp_df) + + +def test_any(): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) + expected = DataFrame( + [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] + ) + expected.index.name = "A" + result = df.groupby("A").any() + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_bool_aggs_dup_column_labels(bool_agg_func): + # 21668 + df = DataFrame([[True, True]], columns=["a", "a"]) + grp_by = df.groupby([0]) + result = getattr(grp_by, bool_agg_func)() + + expected = df + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 515774eae009b..843d438018a32 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -14,7 +14,6 @@ Series, Timestamp, date_range, - isna, ) import pandas._testing as tm import pandas.core.nanops as nanops @@ -41,41 +40,6 @@ def numpy_dtypes_for_minmax(request): return (dtype, min_val, max_val) -@pytest.mark.parametrize("agg_func", ["any", "all"]) -@pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.parametrize( - "vals", - [ - ["foo", "bar", "baz"], - ["foo", "", ""], - ["", "", ""], - [1, 2, 3], - [1, 0, 0], - [0, 0, 0], - [1.0, 2.0, 3.0], - [1.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [True, True, True], - [True, False, False], - [False, False, False], - [np.nan, np.nan, np.nan], - ], -) -def test_groupby_bool_aggs(agg_func, skipna, vals): - df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) - - # Figure out expectation using Python builtin - exp = getattr(builtins, agg_func)(vals) - - # edge case for missing data with skipna and 'any' - if skipna and all(isna(vals)) and agg_func == "any": - exp = False - - exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) - result = getattr(df.groupby("key"), agg_func)(skipna=skipna) - tm.assert_frame_equal(result, exp_df) - - def test_max_min_non_numeric(): # #2700 aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]}) @@ -344,14 +308,6 @@ def test_idxmin(self, gb): result = gb.idxmin() tm.assert_frame_equal(result, expected) - def test_any(self, gb): - expected = DataFrame( - [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] - ) - expected.index.name = "A" - result = gb.any() - tm.assert_frame_equal(result, expected) - def test_mad(self, gb, gni): # mad expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index de508b8cd78ec..6c51e32fa9a78 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1978,17 +1978,6 @@ def test_groupby_duplicate_index(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) -def test_bool_aggs_dup_column_labels(bool_agg_func): - # 21668 - df = DataFrame([[True, True]], columns=["a", "a"]) - grp_by = df.groupby([0]) - result = getattr(grp_by, bool_agg_func)() - - expected = df - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "idx", [Index(["a", "a"]), MultiIndex.from_tuples((("a", "a"), ("a", "a")))] )