From 31ed6d20620f23a5129edb010147aba80d9e8b87 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Sat, 20 Jun 2020 15:03:15 +0200 Subject: [PATCH 1/4] :white_check_mark: --- pandas/tests/groupby/test_apply.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 8468a21904bf8..482bb14cc8d03 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -974,3 +974,21 @@ def test_apply_function_with_indexing_return_column(): result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) tm.assert_frame_equal(result, expected) + + +def test_apply_function_called_count(capsys): + # GH: 31111 + # groupby-apply need to execute len(set(group_by_columns)) times + # `https://github.com/pandas-dev/pandas/issues/31111` + + + function_called_count = 2 # Number of times `apply` should call a function for the current test + + df = pd.DataFrame({"group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"]}, + index=["0", "2", "4", "6", "8", "10", "12", "14"]) + + df.groupby('group_by_column').apply(lambda df:print("function_called")) + + # If `groupby` behaves unexpectedly, this test will break + assert capsys.readouterr().out.count("function_called") == function_called_count \ No newline at end of file From 0d9ce0368a9e50721a97b0d5e743e7165cb92dc3 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Sat, 20 Jun 2020 15:28:50 +0200 Subject: [PATCH 2/4] :white_check_mark: --- pandas/tests/groupby/test_apply.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index b28153cd2568b..92f99a2f141d3 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -979,16 +979,15 @@ def test_apply_function_with_indexing_return_column(): def test_apply_function_called_count(capsys): # GH: 31111 # groupby-apply need to execute len(set(group_by_columns)) times - # `https://github.com/pandas-dev/pandas/issues/31111` - - function_called_count = 2 # Number of times `apply` should call a function for the current test - + expected = 2 # Number of times `apply` should call a function for the current test + df = pd.DataFrame({"group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], - "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"]}, - index=["0", "2", "4", "6", "8", "10", "12", "14"]) + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"]}, + index=["0", "2", "4", "6", "8", "10", "12", "14"]) - df.groupby('group_by_column').apply(lambda df:print("function_called")) + df.groupby('group_by_column').apply(lambda df: print("function_called")) + result = capsys.readouterr().out.count("function_called") # If `groupby` behaves unexpectedly, this test will break - assert capsys.readouterr().out.count("function_called") == function_called_count \ No newline at end of file + assert result == expected \ No newline at end of file From 11755d26435cc06d33d365fa70598e5ba577fb29 Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Sat, 20 Jun 2020 15:33:55 +0200 Subject: [PATCH 3/4] reformatted accordingly, for linting issues --- pandas/tests/groupby/test_apply.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 92f99a2f141d3..3c8f8b649e60e 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -982,12 +982,16 @@ def test_apply_function_called_count(capsys): expected = 2 # Number of times `apply` should call a function for the current test - df = pd.DataFrame({"group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], - "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"]}, - index=["0", "2", "4", "6", "8", "10", "12", "14"]) + df = pd.DataFrame( + { + "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], + }, + index=["0", "2", "4", "6", "8", "10", "12", "14"], + ) - df.groupby('group_by_column').apply(lambda df: print("function_called")) + df.groupby("group_by_column").apply(lambda df: print("function_called")) result = capsys.readouterr().out.count("function_called") # If `groupby` behaves unexpectedly, this test will break - assert result == expected \ No newline at end of file + assert result == expected From c4b6c59644d96abe4a9b65da0428da7a6fd8dcfe Mon Sep 17 00:00:00 2001 From: Rohith295 Date: Sat, 20 Jun 2020 16:57:13 +0200 Subject: [PATCH 4/4] Fixed as per the review comments --- pandas/tests/groupby/test_apply.py | 42 +++++++++++++++--------------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 3c8f8b649e60e..1945647ced08f 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -190,6 +190,27 @@ def f_constant_df(group): assert names == group_names +def test_group_apply_once_per_group2(capsys): + # GH: 31111 + # groupby-apply need to execute len(set(group_by_columns)) times + + expected = 2 # Number of times `apply` should call a function for the current test + + df = pd.DataFrame( + { + "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], + "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], + }, + index=["0", "2", "4", "6", "8", "10", "12", "14"], + ) + + df.groupby("group_by_column").apply(lambda df: print("function_called")) + + result = capsys.readouterr().out.count("function_called") + # If `groupby` behaves unexpectedly, this test will break + assert result == expected + + def test_apply_fast_slow_identical(): # GH 31613 @@ -974,24 +995,3 @@ def test_apply_function_with_indexing_return_column(): result = df.groupby("foo1", as_index=False).apply(lambda x: x.mean()) expected = DataFrame({"foo1": ["one", "three", "two"], "foo2": [3.0, 4.0, 4.0]}) tm.assert_frame_equal(result, expected) - - -def test_apply_function_called_count(capsys): - # GH: 31111 - # groupby-apply need to execute len(set(group_by_columns)) times - - expected = 2 # Number of times `apply` should call a function for the current test - - df = pd.DataFrame( - { - "group_by_column": [0, 0, 0, 0, 1, 1, 1, 1], - "test_column": ["0", "2", "4", "6", "8", "10", "12", "14"], - }, - index=["0", "2", "4", "6", "8", "10", "12", "14"], - ) - - df.groupby("group_by_column").apply(lambda df: print("function_called")) - - result = capsys.readouterr().out.count("function_called") - # If `groupby` behaves unexpectedly, this test will break - assert result == expected