From b74ee0f8342c8832b5df80a264dfe872fdbe6271 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Mon, 4 Dec 2017 23:11:18 +0200 Subject: [PATCH 01/18] add pivot with multi-values --- pandas/core/reshape/reshape.py | 16 +++++++++------- pandas/tests/reshape/test_pivot.py | 23 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 7 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 5bb86885c0875..ab4bc561b5dc0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -374,15 +374,17 @@ def pivot(self, index=None, columns=None, values=None): cols = [columns] if index is None else [index, columns] append = index is None indexed = self.set_index(cols, append=append) - return indexed.unstack(columns) else: - if index is None: - index = self.index + index = self.index if index is None else self[index] + index = MultiIndex.from_arrays([index, self[columns]]) + if isinstance(values, list): + indexed = DataFrame(self[values].values, + index=index, + columns=values) else: - index = self[index] - indexed = Series(self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) - return indexed.unstack(columns) + indexed = Series(self[values].values, + index=index) + return indexed.unstack(columns) def pivot_simple(index, columns, values): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 857116c8f8f78..fdb02d4042f10 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -353,6 +353,29 @@ def test_pivot_periods(self): pv = df.pivot(index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) + def test_pivot_with_multi_values(self): + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + + results = df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) + + data = [[None, 'A', None, 4], + [None, 'C', None, 6], + [None, 'B', None, 5], + ['A', None, 1, None], + ['B', None, 2, None], + ['C', None, 3, None]] + index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') + columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[None, 'foo']) + expected = DataFrame(data=data, index=index, + columns=columns, dtype='object') + + tm.assert_frame_equal(results, expected) + def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'], From a36f9e0eb4d53cbd4e6cef2cc01cfef08337450b Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Mon, 4 Dec 2017 23:16:44 +0200 Subject: [PATCH 02/18] update whatsnew --- doc/source/enhancingperf.rst | 3 +-- doc/source/install.rst | 2 +- doc/source/whatsnew/v0.22.0.txt | 11 +++++------ 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/doc/source/enhancingperf.rst b/doc/source/enhancingperf.rst index 264bd1de1fc77..cbe945e0cf2cf 100644 --- a/doc/source/enhancingperf.rst +++ b/doc/source/enhancingperf.rst @@ -94,8 +94,7 @@ hence we'll concentrate our efforts cythonizing these two functions. Plain cython ~~~~~~~~~~~~ -First we're going to need to import the cython magic function to ipython (for -cython versions < 0.21 you can use ``%load_ext cythonmagic``): +First we're going to need to import the cython magic function to ipython: .. ipython:: python :okwarning: diff --git a/doc/source/install.rst b/doc/source/install.rst index ae89c64b6e91e..aeb1abbadabb3 100644 --- a/doc/source/install.rst +++ b/doc/source/install.rst @@ -228,7 +228,7 @@ Optional Dependencies ~~~~~~~~~~~~~~~~~~~~~ * `Cython `__: Only necessary to build development - version. Version 0.23 or higher. + version. Version 0.24 or higher. * `SciPy `__: miscellaneous statistical functions, Version 0.14.0 or higher * `xarray `__: pandas like handling for > 2 dims, needed for converting Panels to xarray objects. Version 0.7.0 or higher is recommended. * `PyTables `__: necessary for HDF5-based storage. Version 3.0.0 or higher required, Version 3.2.1 or higher highly recommended. diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 5e605ecb7d8d5..300734d90934c 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -77,16 +77,13 @@ Other Enhancements - :func:`Series.fillna` now accepts a Series or a dict as a ``value`` for a categorical dtype (:issue:`17033`) - :func:`pandas.read_clipboard` updated to use qtpy, falling back to PyQt5 and then PyQt4, adding compatibility with Python3 and multiple python-qt bindings (:issue:`17722`) - Improved wording of ``ValueError`` raised in :func:`read_csv` when the ``usecols`` argument cannot match all columns. (:issue:`17301`) +- :func:`DataFrame.pivot` now accepts a list of values (:issue:`17160`). .. _whatsnew_0220.api_breaking: Backwards incompatible API changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) -- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) -- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) - .. _whatsnew_0220.api_breaking.deps: Dependencies have increased minimum versions @@ -104,8 +101,6 @@ If installed, we now require: +-----------------+-----------------+----------+ - - .. _whatsnew_0220.api: Other API Changes @@ -129,6 +124,10 @@ Other API Changes - :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) - Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`) +- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) +- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) +- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) +- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) .. _whatsnew_0220.deprecations: From 5f94728dc421d992e44e990b2c8d6a3a3e8f6d6e Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Tue, 5 Dec 2017 03:55:52 +0200 Subject: [PATCH 03/18] fix review comments --- pandas/core/reshape/reshape.py | 3 ++- pandas/tests/reshape/test_pivot.py | 30 ++++++++++++++++++++---------- 2 files changed, 22 insertions(+), 11 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index ab4bc561b5dc0..57041e56c1d47 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -377,7 +377,8 @@ def pivot(self, index=None, columns=None, values=None): else: index = self.index if index is None else self[index] index = MultiIndex.from_arrays([index, self[columns]]) - if isinstance(values, list): + if is_list_like(values): + # use DF in case of list like (Iterable) values (e.g: lists, tuples, sets, NumPy arrays, and Pandas Series) indexed = DataFrame(self[values].values, index=index, columns=values) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index fdb02d4042f10..a2fc6a549d95f 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -353,20 +353,25 @@ def test_pivot_periods(self): pv = df.pivot(index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) - def test_pivot_with_multi_values(self): + def test_pivot_with_list_like_values(self): + # issue #17160: Make `DataFrame.pivot` accepts a list of column names as values df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - results = df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) - - data = [[None, 'A', None, 4], - [None, 'C', None, 6], - [None, 'B', None, 5], - ['A', None, 1, None], - ['B', None, 2, None], - ['C', None, 3, None]] + result_list = df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) + result_tuple = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + result_array = df.pivot(index='zoo', columns='foo', values=np.array(['bar', 'baz'])) + result_series = df.pivot(index='zoo', columns='foo', values=pd.Series(['bar', 'baz'])) + result_index = df.pivot(index='zoo', columns='foo', values=pd.Index(['bar', 'baz'])) + + data = [[np.nan, 'A', np.nan, 4], + [np.nan, 'C', np.nan, 6], + [np.nan, 'B', np.nan, 5], + ['A', np.nan, 1, np.nan], + ['B', np.nan, 2, np.nan], + ['C', np.nan, 3, np.nan]] index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], labels=[[0, 0, 1, 1], [0, 1, 0, 1]], @@ -374,7 +379,12 @@ def test_pivot_with_multi_values(self): expected = DataFrame(data=data, index=index, columns=columns, dtype='object') - tm.assert_frame_equal(results, expected) + tm.assert_frame_equal(result_list, expected) + tm.assert_frame_equal(result_tuple, expected) + tm.assert_frame_equal(result_array, expected) + tm.assert_frame_equal(result_series, expected) + tm.assert_frame_equal(result_index, expected) + def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], From 3008d8eb43d4f5b444c8093d92fbc518a49fa238 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Tue, 5 Dec 2017 04:01:44 +0200 Subject: [PATCH 04/18] PEP8 fixes --- pandas/core/reshape/reshape.py | 2 +- pandas/tests/reshape/test_pivot.py | 18 +++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 57041e56c1d47..f596fe53e96bc 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -378,7 +378,7 @@ def pivot(self, index=None, columns=None, values=None): index = self.index if index is None else self[index] index = MultiIndex.from_arrays([index, self[columns]]) if is_list_like(values): - # use DF in case of list like (Iterable) values (e.g: lists, tuples, sets, NumPy arrays, and Pandas Series) + # use DF in case of Iterable values (e.g: list, tuple, np.array) indexed = DataFrame(self[values].values, index=index, columns=values) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index a2fc6a549d95f..0669ac62bc5fa 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -354,17 +354,22 @@ def test_pivot_periods(self): tm.assert_frame_equal(pv, expected) def test_pivot_with_list_like_values(self): - # issue #17160: Make `DataFrame.pivot` accepts a list of column names as values + # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result_list = df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) - result_tuple = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) - result_array = df.pivot(index='zoo', columns='foo', values=np.array(['bar', 'baz'])) - result_series = df.pivot(index='zoo', columns='foo', values=pd.Series(['bar', 'baz'])) - result_index = df.pivot(index='zoo', columns='foo', values=pd.Index(['bar', 'baz'])) + result_list = df.pivot(index='zoo', columns='foo', + values=['bar', 'baz']) + result_tuple = df.pivot(index='zoo', columns='foo', + values=('bar', 'baz')) + result_array = df.pivot(index='zoo', columns='foo', + values=np.array(['bar', 'baz'])) + result_series = df.pivot(index='zoo', columns='foo', + values=pd.Series(['bar', 'baz'])) + result_index = df.pivot(index='zoo', columns='foo', + values=pd.Index(['bar', 'baz'])) data = [[np.nan, 'A', np.nan, 4], [np.nan, 'C', np.nan, 6], @@ -385,7 +390,6 @@ def test_pivot_with_list_like_values(self): tm.assert_frame_equal(result_series, expected) tm.assert_frame_equal(result_index, expected) - def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'], From 539ffdc8e236e5564a187bd2e4e5e6f0f1540be0 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Fri, 8 Dec 2017 04:08:53 +0200 Subject: [PATCH 05/18] merge master --- doc/source/whatsnew/v0.22.0.txt | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 1bb371760ab31..c353376d8608c 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -159,12 +159,9 @@ If installed, we now require: +-----------------+-----------------+----------+ -<<<<<<< HEAD -======= - Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) - Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`) ->>>>>>> upstream/master .. _whatsnew_0220.api: Other API Changes @@ -187,17 +184,9 @@ Other API Changes - Restricted ``DateOffset`` keyword arguments. Previously, ``DateOffset`` subclasses allowed arbitrary keyword arguments which could lead to unexpected behavior. Now, only valid arguments will be accepted. (:issue:`17176`, :issue:`18226`). - :func:`DataFrame.from_items` provides a more informative error message when passed scalar values (:issue:`17312`) - When created with duplicate labels, ``MultiIndex`` now raises a ``ValueError``. (:issue:`17464`) -<<<<<<< HEAD -- Building from source now explicity requires ``setuptools`` in ``setup.py`` (:issue:`18113`) -- :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) -- :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) -- The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) -- Building pandas for development now requires ``cython >= 0.24`` (:issue:`18613`) -======= - :func:`Series.fillna` now raises a ``TypeError`` instead of a ``ValueError`` when passed a list, tuple or DataFrame as a ``value`` (:issue:`18293`) - :func:`pandas.DataFrame.merge` no longer casts a ``float`` column to ``object`` when merging on ``int`` and ``float`` columns (:issue:`16572`) - The default NA value for :class:`UInt64Index` has changed from 0 to ``NaN``, which impacts methods that mask with NA, such as ``UInt64Index.where()`` (:issue:`18398`) ->>>>>>> upstream/master .. _whatsnew_0220.deprecations: @@ -336,4 +325,4 @@ Other ^^^^^ - Improved error message when attempting to use a Python keyword as an identifier in a ``numexpr`` backed query (:issue:`18221`) -- +- \ No newline at end of file From 6646798fec63af81fc230f62f80a4134294011c0 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Sat, 16 Dec 2017 19:21:45 +0200 Subject: [PATCH 06/18] remove tuple from test --- pandas/tests/reshape/test_pivot.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index be1cdfff304cf..0e060660b4d85 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -373,6 +373,7 @@ def test_pivot_periods(self): def test_pivot_with_list_like_values(self): # issue #17160 + from collections import namedtuple df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], @@ -380,8 +381,6 @@ def test_pivot_with_list_like_values(self): result_list = df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) - result_tuple = df.pivot(index='zoo', columns='foo', - values=('bar', 'baz')) result_array = df.pivot(index='zoo', columns='foo', values=np.array(['bar', 'baz'])) result_series = df.pivot(index='zoo', columns='foo', @@ -403,7 +402,6 @@ def test_pivot_with_list_like_values(self): columns=columns, dtype='object') tm.assert_frame_equal(result_list, expected) - tm.assert_frame_equal(result_tuple, expected) tm.assert_frame_equal(result_array, expected) tm.assert_frame_equal(result_series, expected) tm.assert_frame_equal(result_index, expected) From ea77a97106ed45bacfce222cccc0a119b24cbbb8 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Sat, 16 Dec 2017 20:05:43 +0200 Subject: [PATCH 07/18] update pivot docstring --- pandas/core/frame.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 65934494b321b..912a90b92dde4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4344,8 +4344,8 @@ def pivot(self, index=None, columns=None, values=None): existing index. columns : string or object Column name to use to make new frame's columns - values : string or object, optional - Column name to use for populating new frame's values. If not + values : string, object or a list of the previous, optional + Column name(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will have hierarchically indexed columns @@ -4370,7 +4370,8 @@ def pivot(self, index=None, columns=None, values=None): >>> df = pd.DataFrame({'foo': ['one','one','one','two','two','two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], - 'baz': [1, 2, 3, 4, 5, 6]}) + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) >>> df foo bar baz 0 one A 1 @@ -4390,6 +4391,19 @@ def pivot(self, index=None, columns=None, values=None): one 1 2 3 two 4 5 6 + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + A B C A B C + one 1 2 3 x y z + two 4 5 6 q w t + + >>> df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) + one two one two + q None A None 4 + t None C None 6 + w None B None 5 + x A None 1 None + y B None 2 None + z C None 3 None """ from pandas.core.reshape.reshape import pivot From 1d6bf58f56f039f711a59999e8125b07d34de1e9 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf ElDen Date: Sat, 16 Dec 2017 21:23:48 +0200 Subject: [PATCH 08/18] remove unused import --- pandas/tests/reshape/test_pivot.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 0e060660b4d85..2ff43352fbb66 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -373,7 +373,6 @@ def test_pivot_periods(self): def test_pivot_with_list_like_values(self): # issue #17160 - from collections import namedtuple df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], From df2f0b0fe55b68e5d72346c2e14f8ac79f9b173a Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Tue, 2 Jan 2018 17:58:08 +0200 Subject: [PATCH 09/18] Push requested changes --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/frame.py | 30 +++++++++++++----------------- pandas/core/reshape/reshape.py | 2 +- pandas/tests/reshape/test_pivot.py | 21 +++++++-------------- 4 files changed, 22 insertions(+), 32 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index bd3bee507baa3..06209a5c6c421 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -145,6 +145,7 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). .. _whatsnew_0230.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7264f784c113..b6ef0eea9e715 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4355,7 +4355,7 @@ def pivot(self, index=None, columns=None, values=None): existing index. columns : string or object Column name to use to make new frame's columns - values : string, object or a list of the previous, optional + values : string, object or (0.23.0) a list of the previous, optional Column name(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will have hierarchically indexed columns @@ -4384,13 +4384,13 @@ def pivot(self, index=None, columns=None, values=None): 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) >>> df - foo bar baz - 0 one A 1 - 1 one B 2 - 2 one C 3 - 3 two A 4 - 4 two B 5 - 5 two C 6 + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t >>> df.pivot(index='foo', columns='bar', values='baz') A B C @@ -4402,20 +4402,16 @@ def pivot(self, index=None, columns=None, values=None): one 1 2 3 two 4 5 6 + >>> df.pivot(index='foo', columns='bar', values=['baz']) + A B C + one 1 2 3 + two 4 5 6 + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) A B C A B C one 1 2 3 x y z two 4 5 6 q w t - >>> df.pivot(index='zoo', columns='foo', values=['bar', 'baz']) - one two one two - q None A None 4 - t None C None 6 - w None B None 5 - x A None 1 None - y B None 2 None - z C None 3 None - """ from pandas.core.reshape.reshape import pivot return pivot(self, index=index, columns=columns, values=values) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a01bdb2128bd7..b3c4513263b08 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -372,7 +372,7 @@ def pivot(self, index=None, columns=None, values=None): index = self.index if index is None else self[index] index = MultiIndex.from_arrays([index, self[columns]]) if is_list_like(values): - # use DF in case of Iterable values (e.g: list, tuple, np.array) + # use DF in case of Iterable values (e.g: list, Series, np.array) indexed = DataFrame(self[values].values, index=index, columns=values) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 60f129c95c4c9..25728b2144249 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -371,21 +371,18 @@ def test_pivot_periods(self): pv = df.pivot(index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) - def test_pivot_with_list_like_values(self): + @pytest.mark.parametrize('values', [ + ['bar', 'baz'], np.array(['bar', 'baz']), + pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz']) + ]) + def test_pivot_with_list_like_values(self, values): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - result_list = df.pivot(index='zoo', columns='foo', - values=['bar', 'baz']) - result_array = df.pivot(index='zoo', columns='foo', - values=np.array(['bar', 'baz'])) - result_series = df.pivot(index='zoo', columns='foo', - values=pd.Series(['bar', 'baz'])) - result_index = df.pivot(index='zoo', columns='foo', - values=pd.Index(['bar', 'baz'])) + result = df.pivot(index='zoo', columns='foo', values=values) data = [[np.nan, 'A', np.nan, 4], [np.nan, 'C', np.nan, 6], @@ -399,11 +396,7 @@ def test_pivot_with_list_like_values(self): names=[None, 'foo']) expected = DataFrame(data=data, index=index, columns=columns, dtype='object') - - tm.assert_frame_equal(result_list, expected) - tm.assert_frame_equal(result_array, expected) - tm.assert_frame_equal(result_series, expected) - tm.assert_frame_equal(result_index, expected) + tm.assert_frame_equal(result, expected) def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], From 8f8b45f0eee72afb64a6423d6ebdec774bb30c34 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Thu, 4 Jan 2018 17:10:31 +0200 Subject: [PATCH 10/18] Revert whatsnew v0.22.0 --- doc/source/whatsnew/v0.22.0.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.22.0.txt b/doc/source/whatsnew/v0.22.0.txt index 90d37999aab84..d165339cb0de9 100644 --- a/doc/source/whatsnew/v0.22.0.txt +++ b/doc/source/whatsnew/v0.22.0.txt @@ -240,4 +240,4 @@ With conda, use Note that the inconsistency in the return value for all-*NA* series is still there for pandas 0.20.3 and earlier. Avoiding pandas 0.21 will only help with -the empty case. \ No newline at end of file +the empty case. From 99abef469909d914f0f8f1a9875fdb15a114bbd1 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Thu, 4 Jan 2018 18:01:12 +0200 Subject: [PATCH 11/18] Add two more tests --- pandas/core/frame.py | 9 ++---- pandas/core/reshape/reshape.py | 2 +- pandas/tests/reshape/test_pivot.py | 44 ++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 8 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b6ef0eea9e715..963aaf99603ac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4355,7 +4355,7 @@ def pivot(self, index=None, columns=None, values=None): existing index. columns : string or object Column name to use to make new frame's columns - values : string, object or (0.23.0) a list of the previous, optional + values : string, object or a list (0.23.0) of the previous, optional Column name(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will have hierarchically indexed columns @@ -4392,17 +4392,12 @@ def pivot(self, index=None, columns=None, values=None): 4 two B 5 w 5 two C 6 t - >>> df.pivot(index='foo', columns='bar', values='baz') - A B C - one 1 2 3 - two 4 5 6 - >>> df.pivot(index='foo', columns='bar')['baz'] A B C one 1 2 3 two 4 5 6 - >>> df.pivot(index='foo', columns='bar', values=['baz']) + >>> df.pivot(index='foo', columns='bar', values='baz') A B C one 1 2 3 two 4 5 6 diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index b3c4513263b08..1873255893c3b 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -371,7 +371,7 @@ def pivot(self, index=None, columns=None, values=None): else: index = self.index if index is None else self[index] index = MultiIndex.from_arrays([index, self[columns]]) - if is_list_like(values): + if is_list_like(values) and not isinstance(values, tuple): # use DF in case of Iterable values (e.g: list, Series, np.array) indexed = DataFrame(self[values].values, index=index, diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 25728b2144249..beb30cfa061bb 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -398,6 +398,50 @@ def test_pivot_with_list_like_values(self, values): columns=columns, dtype='object') tm.assert_frame_equal(result, expected) + def test_pivot_with_multiindex(self): + # issue #17160 + index = pd.Index(data=[0, 1, 2, 3, 4, 5]) + data = [['one', 'A', 1, 'x'], + ['one', 'B', 2, 'y'], + ['one', 'C', 3, 'z'], + ['two', 'A', 4, 'q'], + ['two', 'B', 5, 'w'], + ['two', 'C', 6, 't']] + columns = pd.MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = pd.DataFrame(data=data, index=index, columns=columns, dtype='object') + result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), values=('baz', 'first')) + + data = {'A': pd.Series([1, 4], index=['one', 'two']), + 'B': pd.Series([2, 5], index=['one', 'two']), + 'C': pd.Series([3, 6], index=['one', 'two'])} + expected = pd.DataFrame(data) + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason='tuple is seen as a single column name') + def test_pivot_with_tuple_of_values(self): + # issue #17160 + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + + result = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + + data = [[np.nan, 'A', np.nan, 4], + [np.nan, 'C', np.nan, 6], + [np.nan, 'B', np.nan, 5], + ['A', np.nan, 1, np.nan], + ['B', np.nan, 2, np.nan], + ['C', np.nan, 3, np.nan]] + index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') + columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[None, 'foo']) + expected = DataFrame(data=data, index=index, + columns=columns, dtype='object') + tm.assert_frame_equal(result, expected) + def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'], From 41ad9c0f105681ef0e7738185ff6abce072d53fc Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Thu, 4 Jan 2018 18:11:06 +0200 Subject: [PATCH 12/18] PEP8 --- pandas/tests/reshape/test_pivot.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index beb30cfa061bb..dc26f0889c98c 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -400,22 +400,23 @@ def test_pivot_with_list_like_values(self, values): def test_pivot_with_multiindex(self): # issue #17160 - index = pd.Index(data=[0, 1, 2, 3, 4, 5]) + index = Index(data=[0, 1, 2, 3, 4, 5]) data = [['one', 'A', 1, 'x'], ['one', 'B', 2, 'y'], ['one', 'C', 3, 'z'], ['two', 'A', 4, 'q'], ['two', 'B', 5, 'w'], ['two', 'C', 6, 't']] - columns = pd.MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) - df = pd.DataFrame(data=data, index=index, columns=columns, dtype='object') - result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), values=('baz', 'first')) - - data = {'A': pd.Series([1, 4], index=['one', 'two']), - 'B': pd.Series([2, 5], index=['one', 'two']), - 'C': pd.Series([3, 6], index=['one', 'two'])} - expected = pd.DataFrame(data) + columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(data=data, index=index, columns=columns, dtype='object') + result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), + values=('baz', 'first')) + + data = {'A': Series([1, 4], index=['one', 'two']), + 'B': Series([2, 5], index=['one', 'two']), + 'C': Series([3, 6], index=['one', 'two'])} + expected = DataFrame(data) tm.assert_frame_equal(result, expected) @pytest.mark.xfail(reason='tuple is seen as a single column name') From 516690c0af66eb8d945adbbc116ee16910fc65a7 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf ElDen Date: Thu, 1 Mar 2018 21:28:49 +0200 Subject: [PATCH 13/18] Use pytest raises instead of xfail --- pandas/tests/reshape/test_pivot.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index dc26f0889c98c..1e10251196e3a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -419,15 +419,15 @@ def test_pivot_with_multiindex(self): expected = DataFrame(data) tm.assert_frame_equal(result, expected) - @pytest.mark.xfail(reason='tuple is seen as a single column name') def test_pivot_with_tuple_of_values(self): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], 'baz': [1, 2, 3, 4, 5, 6], 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) - - result = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + with pytest.raises(KeyError): + # tuple is seen as a single column name + result = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) data = [[np.nan, 'A', np.nan, 4], [np.nan, 'C', np.nan, 6], From e30fd1c915f3715c3ad450fcc40c9a79efa2ca24 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf ElDen Date: Thu, 1 Mar 2018 22:22:53 +0200 Subject: [PATCH 14/18] Remove unnecessary code --- pandas/tests/reshape/test_pivot.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 1e10251196e3a..4d2f2778b28c5 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -429,20 +429,6 @@ def test_pivot_with_tuple_of_values(self): # tuple is seen as a single column name result = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) - data = [[np.nan, 'A', np.nan, 4], - [np.nan, 'C', np.nan, 6], - [np.nan, 'B', np.nan, 5], - ['A', np.nan, 1, np.nan], - ['B', np.nan, 2, np.nan], - ['C', np.nan, 3, np.nan]] - index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') - columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], - labels=[[0, 0, 1, 1], [0, 1, 0, 1]], - names=[None, 'foo']) - expected = DataFrame(data=data, index=index, - columns=columns, dtype='object') - tm.assert_frame_equal(result, expected) - def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'], From eb9d85fcd6b97dc2f35073804052b79c7fa1d722 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Fri, 2 Mar 2018 02:27:52 +0200 Subject: [PATCH 15/18] Fix review comments --- pandas/core/reshape/reshape.py | 1 + pandas/tests/reshape/test_pivot.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 8008e00e09860..389f1af48434a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -400,6 +400,7 @@ def pivot(self, index=None, columns=None, values=None): index = MultiIndex.from_arrays([index, self[columns]]) if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name indexed = self._constructor(self[values].values, index=index, columns=values) else: diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 4d2f2778b28c5..9585945423a2a 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -427,7 +427,7 @@ def test_pivot_with_tuple_of_values(self): 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) with pytest.raises(KeyError): # tuple is seen as a single column name - result = df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], From 8ea45f82eee57e8ac3357880d258bdf987fb32a0 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Sun, 18 Mar 2018 15:01:21 +0200 Subject: [PATCH 16/18] xfail multiindex test --- pandas/tests/reshape/test_pivot.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 9585945423a2a..e33feccb5e423 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -398,6 +398,8 @@ def test_pivot_with_list_like_values(self, values): columns=columns, dtype='object') tm.assert_frame_equal(result, expected) + @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails' + 'with KeyError #19966') def test_pivot_with_multiindex(self): # issue #17160 index = Index(data=[0, 1, 2, 3, 4, 5]) From 3825c9a7bc24a39e99d13f4fc6e9faa7e64f52e4 Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Sun, 18 Mar 2018 15:27:57 +0200 Subject: [PATCH 17/18] Add additional test --- pandas/tests/reshape/test_pivot.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index e33feccb5e423..92bedbabdf2f1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -371,11 +371,34 @@ def test_pivot_periods(self): pv = df.pivot(index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) + @pytest.mark.parametrize('values', [ + ['baz', 'zoo'], np.array(['baz', 'zoo']), + pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo']) + ]) + def test_pivot_with_list_like_values(self, values): + # issue #17160 + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + + result = df.pivot(index='foo', columns='bar', values=values) + + data = [[1, 2, 3, 'x', 'y', 'z'], + [4, 5, 6, 'q', 'w', 't']] + index = Index(data=['one', 'two'], name='foo') + columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']], + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + names=[None, 'bar']) + expected = DataFrame(data=data, index=index, + columns=columns, dtype='object') + tm.assert_frame_equal(result, expected) + @pytest.mark.parametrize('values', [ ['bar', 'baz'], np.array(['bar', 'baz']), pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz']) ]) - def test_pivot_with_list_like_values(self, values): + def test_pivot_with_list_like_values_nans(self, values): # issue #17160 df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], From e29374164ff7d3c6e4f974c64602699808cb8c7a Mon Sep 17 00:00:00 2001 From: Ibrahim Sharaf Date: Tue, 20 Mar 2018 17:31:22 +0200 Subject: [PATCH 18/18] Review changes --- pandas/core/frame.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 7019538c894b3..f0770893140e4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4955,12 +4955,15 @@ def pivot(self, index=None, columns=None, values=None): Column to use to make new frame's index. If None, uses existing index. columns : string or object - Column name to use to make new frame's columns - values : string, object or a list (0.23.0) of the previous, optional - Column name(s) to use for populating new frame's values. If not + Column to use to make new frame's columns. + values : string, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will have hierarchically indexed columns. + .. versionchanged :: 0.23.0 + Also accept list of column names. + Returns ------- DataFrame @@ -5013,13 +5016,14 @@ def pivot(self, index=None, columns=None, values=None): two 4 5 6 >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) - baz zoo + baz zoo bar A B C A B C foo one 1 2 3 x y z two 4 5 6 q w t A ValueError is raised if there are any duplicates. + >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], ... "bar": ['A', 'A', 'B', 'C'], ... "baz": [1, 2, 3, 4]}) @@ -5029,8 +5033,10 @@ def pivot(self, index=None, columns=None, values=None): 1 one A 2 2 two B 3 3 two C 4 + Notice that the first two rows are the same for our `index` and `columns` arguments. + >>> df.pivot(index='foo', columns='bar', values='baz') Traceback (most recent call last): ...