From 6095c991cfbdeda99d11514cdc1faa891f495772 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Sat, 29 Feb 2020 14:25:09 +0200 Subject: [PATCH 1/3] TST: Removed import of itertools --- pandas/tests/groupby/test_function.py | 55 ++++++++++++++++----------- 1 file changed, 32 insertions(+), 23 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index c402ca194648f..5cf56a0f343b6 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1,7 +1,6 @@ import builtins -import datetime as dt +import datetime from io import StringIO -from itertools import product from string import ascii_lowercase import numpy as np @@ -438,7 +437,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data): Timestamp("2011-01-15 12:50:28.502376"), Timestamp("2011-01-20 12:50:28.593448"), ), - (24650000000000001, 24650000000000002), + (24_650_000_000_000_001, 24_650_000_000_000_002), ], ) def test_groupby_non_arithmetic_agg_int_like_precision(i): @@ -1133,19 +1132,31 @@ def test_nunique_with_timegrouper(): ), ( ["x", "x", "x"], - [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + [datetime.date(2019, 1, 1), NaT, datetime.date(2019, 1, 1)], True, Series([1], index=pd.Index(["x"], name="key"), name="data"), ), ( ["x", "x", "x", "y", "y"], - [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + [ + datetime.date(2019, 1, 1), + NaT, + datetime.date(2019, 1, 1), + NaT, + datetime.date(2019, 1, 1), + ], False, Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), ), ( ["x", "x", "x", "x", "y"], - [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], + [ + datetime.date(2019, 1, 1), + NaT, + datetime.date(2019, 1, 1), + NaT, + datetime.date(2019, 1, 1), + ], False, Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), ), @@ -1296,36 +1307,32 @@ def __eq__(self, other): # -------------------------------- -def test_size(df): - grouped = df.groupby(["A", "B"]) +@pytest.mark.parametrize("group_key", ["A", "B", ["A", "B"]]) +def test_size(df, group_key): + grouped = df.groupby(group_key) result = grouped.size() for key, group in grouped: assert result[key] == len(group) - grouped = df.groupby("A") - result = grouped.size() - for key, group in grouped: - assert result[key] == len(group) - grouped = df.groupby("B") - result = grouped.size() - for key, group in grouped: - assert result[key] == len(group) +@pytest.mark.parametrize("key", ["A", "B", ["A", "B"]]) +@pytest.mark.parametrize("sort", [True, False]) +def test_size_sort(df, sort, key): + df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) + left = df.groupby(key, sort=sort).size() + right = df.groupby(key, sort=sort)["C"].apply(lambda a: a.shape[0]) + tm.assert_series_equal(left, right, check_names=False) - df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("abc")) - for sort, key in product((False, True), ("a", "b", ["a", "b"])): - left = df.groupby(key, sort=sort).size() - right = df.groupby(key, sort=sort)["c"].apply(lambda a: a.shape[0]) - tm.assert_series_equal(left, right, check_names=False) - # GH11699 +def test_size_series_dataframe(): + # https://github.com/pandas-dev/pandas/issues/11699 df = DataFrame(columns=["A", "B"]) out = Series(dtype="int64", index=Index([], name="A")) tm.assert_series_equal(df.groupby("A").size(), out) def test_size_groupby_all_null(): - # GH23050 + # https://github.com/pandas-dev/pandas/issues/23050 # Assert no 'Value Error : Length of passed values is 2, index implies 0' df = DataFrame({"A": [None, None]}) # all-null groups result = df.groupby("A").size() @@ -1335,6 +1342,8 @@ def test_size_groupby_all_null(): # quantile # -------------------------------- + + @pytest.mark.parametrize( "interpolation", ["linear", "lower", "higher", "nearest", "midpoint"] ) From 7fe8783792db212e4580680bc5bfec6a9d9cb040 Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 5 Mar 2020 12:21:44 +0200 Subject: [PATCH 2/3] Removed unrelated changes --- pandas/tests/groupby/test_function.py | 22 +++++----------------- 1 file changed, 5 insertions(+), 17 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 5cf56a0f343b6..2540af0d0630a 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1,5 +1,5 @@ import builtins -import datetime +import datetime as dt from io import StringIO from string import ascii_lowercase @@ -437,7 +437,7 @@ def test_groupby_non_arithmetic_agg_types(dtype, method, data): Timestamp("2011-01-15 12:50:28.502376"), Timestamp("2011-01-20 12:50:28.593448"), ), - (24_650_000_000_000_001, 24_650_000_000_000_002), + (24650000000000001, 24650000000000002), ], ) def test_groupby_non_arithmetic_agg_int_like_precision(i): @@ -1132,31 +1132,19 @@ def test_nunique_with_timegrouper(): ), ( ["x", "x", "x"], - [datetime.date(2019, 1, 1), NaT, datetime.date(2019, 1, 1)], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], True, Series([1], index=pd.Index(["x"], name="key"), name="data"), ), ( ["x", "x", "x", "y", "y"], - [ - datetime.date(2019, 1, 1), - NaT, - datetime.date(2019, 1, 1), - NaT, - datetime.date(2019, 1, 1), - ], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], False, Series([2, 2], index=pd.Index(["x", "y"], name="key"), name="data"), ), ( ["x", "x", "x", "x", "y"], - [ - datetime.date(2019, 1, 1), - NaT, - datetime.date(2019, 1, 1), - NaT, - datetime.date(2019, 1, 1), - ], + [dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1), NaT, dt.date(2019, 1, 1)], False, Series([2, 1], index=pd.Index(["x", "y"], name="key"), name="data"), ), From fdf23ee02725891952b6a252b0e8b18b6e61536f Mon Sep 17 00:00:00 2001 From: MomIsBestFriend <> Date: Thu, 5 Mar 2020 12:28:56 +0200 Subject: [PATCH 3/3] Using "by" varibale in both test cases REF: https://github.com/pandas-dev/pandas/pull/32364#discussion_r386040458 https://github.com/pandas-dev/pandas/pull/32364#discussion_r386040656 --- pandas/tests/groupby/test_function.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/tests/groupby/test_function.py b/pandas/tests/groupby/test_function.py index 2540af0d0630a..83080aa98648f 100644 --- a/pandas/tests/groupby/test_function.py +++ b/pandas/tests/groupby/test_function.py @@ -1295,20 +1295,20 @@ def __eq__(self, other): # -------------------------------- -@pytest.mark.parametrize("group_key", ["A", "B", ["A", "B"]]) -def test_size(df, group_key): - grouped = df.groupby(group_key) +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) +def test_size(df, by): + grouped = df.groupby(by=by) result = grouped.size() for key, group in grouped: assert result[key] == len(group) -@pytest.mark.parametrize("key", ["A", "B", ["A", "B"]]) +@pytest.mark.parametrize("by", ["A", "B", ["A", "B"]]) @pytest.mark.parametrize("sort", [True, False]) -def test_size_sort(df, sort, key): +def test_size_sort(df, sort, by): df = DataFrame(np.random.choice(20, (1000, 3)), columns=list("ABC")) - left = df.groupby(key, sort=sort).size() - right = df.groupby(key, sort=sort)["C"].apply(lambda a: a.shape[0]) + left = df.groupby(by=by, sort=sort).size() + right = df.groupby(by=by, sort=sort)["C"].apply(lambda a: a.shape[0]) tm.assert_series_equal(left, right, check_names=False)