From 265f086fab4ae76befd5fa07dcb41a6dece7a6d6 Mon Sep 17 00:00:00 2001 From: zym1010 Date: Fri, 9 Mar 2018 10:50:26 -0500 Subject: [PATCH 1/3] add test cases for groupby apply trivial cases --- pandas/tests/groupby/test_groupby.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0561b3a1d8592..564f1201ff4ac 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -233,6 +233,31 @@ def test_apply_issues(self): lambda x: x['time'][x['value'].idxmax()]) assert_series_equal(result, expected) + def test_apply_trivial(self): + # GH 20066 + def gen_one_df(start=0): + df_this = pd.DataFrame({'key' : ['a', 'a', 'b', 'b', 'a'], + 'data' : [1.0,2.0,3.0,4.0,5.0] }, columns=['key', 'data'])[start:] + return df_this + + expected = pd.concat([gen_one_df(1), gen_one_df(1)], axis=1, keys=['float64', 'object']) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: gen_one_df(1)) + + assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason="didn't work yet") + def test_apply_trivial_fail(self): + # GH 20066 + def gen_one_df(start=0): + df_this = pd.DataFrame({'key' : ['a', 'a', 'b', 'b', 'a'], + 'data' : [1.0,2.0,3.0,4.0,5.0] }, columns=['key', 'data'])[start:] + return df_this + + expected = pd.concat([gen_one_df(), gen_one_df()], axis=1, keys=['float64', 'object']) + result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: gen_one_df()) + + assert_frame_equal(result, expected) + def test_time_field_bug(self): # Test a fix for the following error related to GH issue 11324 When # non-key fields in a group-by dataframe contained time-based fields From d255757c55ade924fa0dcd0f1ff2823715c9f51f Mon Sep 17 00:00:00 2001 From: zym1010 Date: Fri, 9 Mar 2018 11:31:26 -0500 Subject: [PATCH 2/3] PEP8 --- pandas/tests/groupby/test_groupby.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 564f1201ff4ac..0e5ab374a8a49 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -236,12 +236,16 @@ def test_apply_issues(self): def test_apply_trivial(self): # GH 20066 def gen_one_df(start=0): - df_this = pd.DataFrame({'key' : ['a', 'a', 'b', 'b', 'a'], - 'data' : [1.0,2.0,3.0,4.0,5.0] }, columns=['key', 'data'])[start:] + df_this = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], + 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=['key', 'data'])[start:] return df_this - expected = pd.concat([gen_one_df(1), gen_one_df(1)], axis=1, keys=['float64', 'object']) - result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: gen_one_df(1)) + df = gen_one_df() + expected = pd.concat([gen_one_df(1), gen_one_df(1)], + axis=1, keys=['float64', 'object']) + result = df.groupby([str(x) for x in df.dtypes], + axis=1).apply(lambda x: gen_one_df(1)) assert_frame_equal(result, expected) @@ -249,12 +253,16 @@ def gen_one_df(start=0): def test_apply_trivial_fail(self): # GH 20066 def gen_one_df(start=0): - df_this = pd.DataFrame({'key' : ['a', 'a', 'b', 'b', 'a'], - 'data' : [1.0,2.0,3.0,4.0,5.0] }, columns=['key', 'data'])[start:] + df_this = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], + 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=['key', 'data'])[start:] return df_this - expected = pd.concat([gen_one_df(), gen_one_df()], axis=1, keys=['float64', 'object']) - result = df.groupby([str(x) for x in df.dtypes], axis=1).apply(lambda x: gen_one_df()) + df = gen_one_df() + expected = pd.concat([gen_one_df(), gen_one_df()], + axis=1, keys=['float64', 'object']) + result = df.groupby([str(x) for x in df.dtypes], + axis=1).apply(lambda x: gen_one_df()) assert_frame_equal(result, expected) From ac6d5aec2eff44f896deab8b10ca60ffcca30e28 Mon Sep 17 00:00:00 2001 From: zym1010 Date: Fri, 9 Mar 2018 20:40:21 -0500 Subject: [PATCH 3/3] add some comments and improve coding style --- pandas/tests/groupby/test_groupby.py | 35 +++++++++++++--------------- 1 file changed, 16 insertions(+), 19 deletions(-) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 0e5ab374a8a49..6756d25ad1707 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -235,34 +235,31 @@ def test_apply_issues(self): def test_apply_trivial(self): # GH 20066 - def gen_one_df(start=0): - df_this = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], - 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=['key', 'data'])[start:] - return df_this - - df = gen_one_df() - expected = pd.concat([gen_one_df(1), gen_one_df(1)], + # trivial apply: ignore input and return a constant dataframe. + df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], + 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=['key', 'data']) + expected = pd.concat([df.iloc[1:], df.iloc[1:]], axis=1, keys=['float64', 'object']) result = df.groupby([str(x) for x in df.dtypes], - axis=1).apply(lambda x: gen_one_df(1)) + axis=1).apply(lambda x: df.iloc[1:]) assert_frame_equal(result, expected) - @pytest.mark.xfail(reason="didn't work yet") + @pytest.mark.xfail(reason=("GH 20066; function passed into apply " + "returns a DataFrame with the same index " + "as the one to create GroupBy object.")) def test_apply_trivial_fail(self): # GH 20066 - def gen_one_df(start=0): - df_this = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], - 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, - columns=['key', 'data'])[start:] - return df_this - - df = gen_one_df() - expected = pd.concat([gen_one_df(), gen_one_df()], + # trivial apply fails if the constant dataframe has the same index + # with the one used to create GroupBy object. + df = pd.DataFrame({'key': ['a', 'a', 'b', 'b', 'a'], + 'data': [1.0, 2.0, 3.0, 4.0, 5.0]}, + columns=['key', 'data']) + expected = pd.concat([df, df], axis=1, keys=['float64', 'object']) result = df.groupby([str(x) for x in df.dtypes], - axis=1).apply(lambda x: gen_one_df()) + axis=1).apply(lambda x: df) assert_frame_equal(result, expected)