From 70a45d75b310f8e975390a21e8efeaf4cf736a9f Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 16 Dec 2017 19:13:11 -0600 Subject: [PATCH 1/7] pivot_table strings as aggfunc --- doc/source/whatsnew/v0.23.0 | 3 ++- pandas/core/reshape/pivot.py | 4 +++- pandas/tests/reshape/test_pivot.py | 14 ++++++++++++++ 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0 b/doc/source/whatsnew/v0.23.0 index 40e1e2011479c..d05f0069a7b3e 100644 --- a/doc/source/whatsnew/v0.23.0 +++ b/doc/source/whatsnew/v0.23.0 @@ -341,7 +341,8 @@ Reshaping - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) -- +- Bug in :func:`Dataframe.pivot_table` which fails when you pass a string for aggfunc arg (:issue:`18713`) +- Numeric ^^^^^^^ diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index a7695bd6f732f..86e3d5bb347c0 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -38,7 +38,9 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', fill_value=fill_value, aggfunc=func, margins=margins, margins_name=margins_name) pieces.append(table) - keys.append(func.__name__) + keys.append(getattr(func, '__name__', func)) + #keys.append(func.__name__) + return concat(pieces, keys=keys, axis=1) keys = index + columns diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index c8b7ae044b71c..73b172d26cc4b 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1109,6 +1109,20 @@ def test_pivot_margins_name_unicode(self): expected = pd.DataFrame(index=index) tm.assert_frame_equal(table, expected) + def test_pivot_func_strings(self): + # GH #18713 + f = lambda func: pivot_table(self.data, values=['D', 'E'], + index=['A', 'B'], columns='C', + aggfunc=func) + result = f('sum') + expected = f(np.sum) + tm.assert_frame_equal(result, expected) + result = f(['mean', 'std']) + means = f(np.mean) + stds = f(np.std) + expected = concat([means, stds], keys=['mean', 'std'], axis=1) + tm.assert_frame_equal(result, expected) + class TestCrosstab(object): From 5b68ae1423921a35c78b1247e95f53a08b7bdc82 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sat, 16 Dec 2017 19:19:28 -0600 Subject: [PATCH 2/7] fixed pep8 issues --- pandas/core/reshape/pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 86e3d5bb347c0..77babf718d78c 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -39,7 +39,6 @@ def pivot_table(data, values=None, index=None, columns=None, aggfunc='mean', margins=margins, margins_name=margins_name) pieces.append(table) keys.append(getattr(func, '__name__', func)) - #keys.append(func.__name__) return concat(pieces, keys=keys, axis=1) From c4d4731929fab669ee40c367a5357f4a64a2c478 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Sun, 17 Dec 2017 20:45:53 -0600 Subject: [PATCH 3/7] fixed whatsnew --- doc/source/whatsnew/v0.23.0 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0 b/doc/source/whatsnew/v0.23.0 index d05f0069a7b3e..6efd26e5fb1ae 100644 --- a/doc/source/whatsnew/v0.23.0 +++ b/doc/source/whatsnew/v0.23.0 @@ -341,7 +341,7 @@ Reshaping - Bug in :func:`DataFrame.stack` which fails trying to sort mixed type levels under Python 3 (:issue:`18310`) - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) -- Bug in :func:`Dataframe.pivot_table` which fails when you pass a string for aggfunc arg (:issue:`18713`) +- Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) - Numeric From 54b80afea18713581c3a027fe8edc96c992f5a82 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Mon, 18 Dec 2017 17:43:39 -0600 Subject: [PATCH 4/7] updated tests --- pandas/tests/reshape/test_pivot.py | 48 +++++++++++++++++++++++------- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 73b172d26cc4b..06634578e65f7 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1109,18 +1109,46 @@ def test_pivot_margins_name_unicode(self): expected = pd.DataFrame(index=index) tm.assert_frame_equal(table, expected) - def test_pivot_func_strings(self): + def test_pivot_string_as_func(self): # GH #18713 - f = lambda func: pivot_table(self.data, values=['D', 'E'], - index=['A', 'B'], columns='C', - aggfunc=func) - result = f('sum') - expected = f(np.sum) + data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', + 'bar', 'bar', 'foo', 'foo', 'foo'], + 'B': ['one', 'one', 'one', 'two', 'one', 'one', + 'one', 'two', 'two', 'two', 'one'], + 'C': range(11)}) + + result = pivot_table(data, index='A', columns='B', aggfunc='sum') + mi = MultiIndex(levels=[['C'], ['one', 'two']], + labels=[[0, 0], [0, 1]], names=[None, 'B']) + expected = DataFrame({('C', 'one'): {'bar': 15, 'foo': 13}, + ('C', 'two'): {'bar': 7, 'foo': 20}}, + columns=mi).rename_axis('A') tm.assert_frame_equal(result, expected) - result = f(['mean', 'std']) - means = f(np.mean) - stds = f(np.std) - expected = concat([means, stds], keys=['mean', 'std'], axis=1) + + result = pivot_table(data, index='A', columns='B', + aggfunc=['sum', 'mean']) + mi = MultiIndex(levels=[['sum', 'mean'], ['C'], ['one', 'two']], + labels=[[0, 0, 1, 1], [0, 0, 0, 0], [0, 1, 0, 1]], + names=[None, None, 'B']) + expected = DataFrame({('mean', 'C', 'one'): {'bar': 5.0, 'foo': 3.25}, + ('mean', 'C', 'two'): {'bar': 7.0, + 'foo': 6.666666666666667}, + ('sum', 'C', 'one'): {'bar': 15, 'foo': 13}, + ('sum', 'C', 'two'): {'bar': 7, 'foo': 20}}, + columns=mi).rename_axis('A') + tm.assert_frame_equal(result, expected) + + funcs = [('sum', np.sum), ('mean', np.mean), ('std', np.std), + (['sum', 'mean'], [np.sum, np.mean]), + (['sum', 'std'], [np.sum, np.std]), + (['std', 'mean'], [np.std, np.mean])] + + @pytest.mark.parametrize("f, f_numpy", funcs) + def test_pivot_string_func_vs_func(self, f, f_numpy): + # GH #18713 + result = pivot_table(self.data, index='A', columns='B', aggfunc=f) + expected = pivot_table(self.data, index='A', columns='B', + aggfunc=f_numpy) tm.assert_frame_equal(result, expected) From 00f2b442bfc5f467af1038ed2e65bdbffca34a62 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Mon, 18 Dec 2017 18:19:18 -0600 Subject: [PATCH 5/7] added comments to test funcs --- pandas/tests/reshape/test_pivot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 06634578e65f7..8c5e5f5fbdff1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1111,6 +1111,7 @@ def test_pivot_margins_name_unicode(self): def test_pivot_string_as_func(self): # GH #18713 + # for correctness purposes data = DataFrame({'A': ['foo', 'foo', 'foo', 'foo', 'bar', 'bar', 'bar', 'bar', 'foo', 'foo', 'foo'], 'B': ['one', 'one', 'one', 'two', 'one', 'one', @@ -1146,6 +1147,7 @@ def test_pivot_string_as_func(self): @pytest.mark.parametrize("f, f_numpy", funcs) def test_pivot_string_func_vs_func(self, f, f_numpy): # GH #18713 + # for consistency purposes result = pivot_table(self.data, index='A', columns='B', aggfunc=f) expected = pivot_table(self.data, index='A', columns='B', aggfunc=f_numpy) From 0decc05ce65bd9e2ae53b3145951b4d842792919 Mon Sep 17 00:00:00 2001 From: Bob Haffner Date: Thu, 21 Dec 2017 10:03:20 -0600 Subject: [PATCH 6/7] removed list usage in test parametrize --- pandas/tests/reshape/test_pivot.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 8c5e5f5fbdff1..6b44a339fad73 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -1139,12 +1139,13 @@ def test_pivot_string_as_func(self): columns=mi).rename_axis('A') tm.assert_frame_equal(result, expected) - funcs = [('sum', np.sum), ('mean', np.mean), ('std', np.std), - (['sum', 'mean'], [np.sum, np.mean]), - (['sum', 'std'], [np.sum, np.std]), - (['std', 'mean'], [np.std, np.mean])] - - @pytest.mark.parametrize("f, f_numpy", funcs) + @pytest.mark.parametrize('f, f_numpy', + [('sum', np.sum), + ('mean', np.mean), + ('std', np.std), + (['sum', 'mean'], [np.sum, np.mean]), + (['sum', 'std'], [np.sum, np.std]), + (['std', 'mean'], [np.std, np.mean])]) def test_pivot_string_func_vs_func(self, f, f_numpy): # GH #18713 # for consistency purposes From e24d2325d3f7b17af6a6e8e451d2acd31ee91dbd Mon Sep 17 00:00:00 2001 From: Jeff Reback Date: Sat, 23 Dec 2017 15:42:38 -0500 Subject: [PATCH 7/7] doc edit --- doc/source/whatsnew/v0.23.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index e448d553a8cef..3f300deddebeb 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -345,7 +345,7 @@ Reshaping - Fixed construction of a :class:`Series` from a ``dict`` containing ``NaN`` as key (:issue:`18480`) - Bug in :func:`Series.rank` where ``Series`` containing ``NaT`` modifies the ``Series`` inplace (:issue:`18521`) - Bug in :func:`Dataframe.pivot_table` which fails when the ``aggfunc`` arg is of type string. The behavior is now consistent with other methods like ``agg`` and ``apply`` (:issue:`18713`) -- + Numeric ^^^^^^^