From 2554921a4fe6e12f4a2773ad7c25ae522077ee73 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 6 Apr 2021 15:02:50 -0400 Subject: [PATCH 1/3] TSTS: Consolidate groupby any, all --- .../tests/groupby/aggregate/test_aggregate.py | 16 ---- pandas/tests/groupby/test_any_all.py | 92 +++++++++++++++++++ pandas/tests/groupby/test_function.py | 43 --------- pandas/tests/groupby/test_groupby.py | 11 --- 4 files changed, 92 insertions(+), 70 deletions(-) create mode 100644 pandas/tests/groupby/test_any_all.py diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index fc0b4d86e81bf..145f2643465ef 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -402,22 +402,6 @@ def test_multi_function_flexible_mix(df): grouped.aggregate(d) -def test_groupby_agg_coercing_bools(): - # issue 14873 - dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) - gp = dat.groupby("a") - - index = Index([1, 2], name="a") - - result = gp["b"].aggregate(lambda x: (x != 0).all()) - expected = Series([False, True], index=index, name="b") - tm.assert_series_equal(result, expected) - - result = gp["c"].aggregate(lambda x: x.isnull().all()) - expected = Series([True, False], index=index, name="c") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize( "op", [ diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py new file mode 100644 index 0000000000000..579909281f18e --- /dev/null +++ b/pandas/tests/groupby/test_any_all.py @@ -0,0 +1,92 @@ +import builtins +from io import StringIO + +import numpy as np +import pytest + +from pandas.errors import UnsupportedFunctionCall + +import pandas as pd +from pandas import ( + DataFrame, + Index, + MultiIndex, + Series, + Timestamp, + date_range, + isna, +) +import pandas._testing as tm +import pandas.core.nanops as nanops +from pandas.util import _test_decorators as td + + +@pytest.mark.parametrize("agg_func", ["any", "all"]) +@pytest.mark.parametrize("skipna", [True, False]) +@pytest.mark.parametrize( + "vals", + [ + ["foo", "bar", "baz"], + ["foo", "", ""], + ["", "", ""], + [1, 2, 3], + [1, 0, 0], + [0, 0, 0], + [1.0, 2.0, 3.0], + [1.0, 0.0, 0.0], + [0.0, 0.0, 0.0], + [True, True, True], + [True, False, False], + [False, False, False], + [np.nan, np.nan, np.nan], + ], +) +def test_groupby_bool_aggs(agg_func, skipna, vals): + df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) + + # Figure out expectation using Python builtin + exp = getattr(builtins, agg_func)(vals) + + # edge case for missing data with skipna and 'any' + if skipna and all(isna(vals)) and agg_func == "any": + exp = False + + exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) + result = getattr(df.groupby("key"), agg_func)(skipna=skipna) + tm.assert_frame_equal(result, exp_df) + + +def test_any(gb): + expected = DataFrame( + [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] + ) + expected.index.name = "A" + result = gb.any() + tm.assert_frame_equal(result, expected) + + +def test_groupby_agg_coercing_bools(): + # issue 14873 + dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + gp = dat.groupby("a") + + index = Index([1, 2], name="a") + + result = gp["b"].aggregate(lambda x: (x != 0).all()) + expected = Series([False, True], index=index, name="b") + tm.assert_series_equal(result, expected) + + result = gp["c"].aggregate(lambda x: x.isnull().all()) + expected = Series([True, False], index=index, name="c") + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) +def test_bool_aggs_dup_column_labels(bool_agg_func): + # 21668 + df = DataFrame([[True, True]], columns=["a", "a"]) + grp_by = df.groupby([0]) + result = getattr(grp_by, bool_agg_func)() + + expected = df + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 515774eae009b..79173ac9bd2cb 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -41,41 +41,6 @@ def numpy_dtypes_for_minmax(request): return (dtype, min_val, max_val) -@pytest.mark.parametrize("agg_func", ["any", "all"]) -@pytest.mark.parametrize("skipna", [True, False]) -@pytest.mark.parametrize( - "vals", - [ - ["foo", "bar", "baz"], - ["foo", "", ""], - ["", "", ""], - [1, 2, 3], - [1, 0, 0], - [0, 0, 0], - [1.0, 2.0, 3.0], - [1.0, 0.0, 0.0], - [0.0, 0.0, 0.0], - [True, True, True], - [True, False, False], - [False, False, False], - [np.nan, np.nan, np.nan], - ], -) -def test_groupby_bool_aggs(agg_func, skipna, vals): - df = DataFrame({"key": ["a"] * 3 + ["b"] * 3, "val": vals * 2}) - - # Figure out expectation using Python builtin - exp = getattr(builtins, agg_func)(vals) - - # edge case for missing data with skipna and 'any' - if skipna and all(isna(vals)) and agg_func == "any": - exp = False - - exp_df = DataFrame([exp] * 2, columns=["val"], index=Index(["a", "b"], name="key")) - result = getattr(df.groupby("key"), agg_func)(skipna=skipna) - tm.assert_frame_equal(result, exp_df) - - def test_max_min_non_numeric(): # #2700 aa = DataFrame({"nn": [11, 11, 22, 22], "ii": [1, 2, 3, 4], "ss": 4 * ["mama"]}) @@ -344,14 +309,6 @@ def test_idxmin(self, gb): result = gb.idxmin() tm.assert_frame_equal(result, expected) - def test_any(self, gb): - expected = DataFrame( - [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] - ) - expected.index.name = "A" - result = gb.any() - tm.assert_frame_equal(result, expected) - def test_mad(self, gb, gni): # mad expected = DataFrame([[0], [np.nan]], columns=["B"], index=[1, 3]) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index de508b8cd78ec..6c51e32fa9a78 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -1978,17 +1978,6 @@ def test_groupby_duplicate_index(): tm.assert_series_equal(result, expected) -@pytest.mark.parametrize("bool_agg_func", ["any", "all"]) -def test_bool_aggs_dup_column_labels(bool_agg_func): - # 21668 - df = DataFrame([[True, True]], columns=["a", "a"]) - grp_by = df.groupby([0]) - result = getattr(grp_by, bool_agg_func)() - - expected = df - tm.assert_frame_equal(result, expected) - - @pytest.mark.parametrize( "idx", [Index(["a", "a"]), MultiIndex.from_tuples((("a", "a"), ("a", "a")))] ) From 9a8f9c9fcb1aa1205812f830a67094c5c25085a6 Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 6 Apr 2021 15:13:02 -0400 Subject: [PATCH 2/3] Fixture fixup --- pandas/tests/groupby/test_any_all.py | 17 ++++++----------- pandas/tests/groupby/test_function.py | 1 - 2 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py index 579909281f18e..964ea0fc9fae9 100644 --- a/pandas/tests/groupby/test_any_all.py +++ b/pandas/tests/groupby/test_any_all.py @@ -1,24 +1,15 @@ import builtins -from io import StringIO import numpy as np import pytest -from pandas.errors import UnsupportedFunctionCall - -import pandas as pd from pandas import ( DataFrame, Index, - MultiIndex, Series, - Timestamp, - date_range, isna, ) import pandas._testing as tm -import pandas.core.nanops as nanops -from pandas.util import _test_decorators as td @pytest.mark.parametrize("agg_func", ["any", "all"]) @@ -56,12 +47,16 @@ def test_groupby_bool_aggs(agg_func, skipna, vals): tm.assert_frame_equal(result, exp_df) -def test_any(gb): +def test_any(): + df = DataFrame( + [[1, 2, "foo"], [1, np.nan, "bar"], [3, np.nan, "baz"]], + columns=["A", "B", "C"], + ) expected = DataFrame( [[True, True], [False, True]], columns=["B", "C"], index=[1, 3] ) expected.index.name = "A" - result = gb.any() + result = df.groupby("A").any() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 79173ac9bd2cb..843d438018a32 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -14,7 +14,6 @@ Series, Timestamp, date_range, - isna, ) import pandas._testing as tm import pandas.core.nanops as nanops From 5ca9c4b7fdbe009f91dc2575be15c93b15b1729e Mon Sep 17 00:00:00 2001 From: Matthew Zeitlin Date: Tue, 6 Apr 2021 15:19:06 -0400 Subject: [PATCH 3/3] Unmove test --- .../tests/groupby/aggregate/test_aggregate.py | 16 ++++++++++++++++ pandas/tests/groupby/test_any_all.py | 17 ----------------- 2 files changed, 16 insertions(+), 17 deletions(-) diff --git a/pandas/tests/groupby/aggregate/test_aggregate.py b/pandas/tests/groupby/aggregate/test_aggregate.py index 145f2643465ef..fc0b4d86e81bf 100644 --- a/pandas/tests/groupby/aggregate/test_aggregate.py +++ b/pandas/tests/groupby/aggregate/test_aggregate.py @@ -402,6 +402,22 @@ def test_multi_function_flexible_mix(df): grouped.aggregate(d) +def test_groupby_agg_coercing_bools(): + # issue 14873 + dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) + gp = dat.groupby("a") + + index = Index([1, 2], name="a") + + result = gp["b"].aggregate(lambda x: (x != 0).all()) + expected = Series([False, True], index=index, name="b") + tm.assert_series_equal(result, expected) + + result = gp["c"].aggregate(lambda x: x.isnull().all()) + expected = Series([True, False], index=index, name="c") + tm.assert_series_equal(result, expected) + + @pytest.mark.parametrize( "op", [ diff --git a/pandas/tests/groupby/test_any_all.py b/pandas/tests/groupby/test_any_all.py index 964ea0fc9fae9..4123fb95002dd 100644 --- a/pandas/tests/groupby/test_any_all.py +++ b/pandas/tests/groupby/test_any_all.py @@ -6,7 +6,6 @@ from pandas import ( DataFrame, Index, - Series, isna, ) import pandas._testing as tm @@ -60,22 +59,6 @@ def test_any(): tm.assert_frame_equal(result, expected) -def test_groupby_agg_coercing_bools(): - # issue 14873 - dat = DataFrame({"a": [1, 1, 2, 2], "b": [0, 1, 2, 3], "c": [None, None, 1, 1]}) - gp = dat.groupby("a") - - index = Index([1, 2], name="a") - - result = gp["b"].aggregate(lambda x: (x != 0).all()) - expected = Series([False, True], index=index, name="b") - tm.assert_series_equal(result, expected) - - result = gp["c"].aggregate(lambda x: x.isnull().all()) - expected = Series([True, False], index=index, name="c") - tm.assert_series_equal(result, expected) - - @pytest.mark.parametrize("bool_agg_func", ["any", "all"]) def test_bool_aggs_dup_column_labels(bool_agg_func): # 21668