diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index 151ab8456c1d7..18013666d0b82 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -326,6 +326,7 @@ Other Enhancements - ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method. Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`). - :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`). +- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`). - Added :func:`pandas.api.extensions.register_dataframe_accessor`, :func:`pandas.api.extensions.register_series_accessor`, and :func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas diff --git a/pandas/core/frame.py b/pandas/core/frame.py index efb002474f876..f0770893140e4 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4956,11 +4956,14 @@ def pivot(self, index=None, columns=None, values=None): existing index. columns : string or object Column to use to make new frame's columns. - values : string or object, optional - Column to use for populating new frame's values. If not + values : string, object or a list of the previous, optional + Column(s) to use for populating new frame's values. If not specified, all remaining columns will be used and the result will have hierarchically indexed columns. + .. versionchanged :: 0.23.0 + Also accept list of column names. + Returns ------- DataFrame @@ -4989,15 +4992,16 @@ def pivot(self, index=None, columns=None, values=None): >>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', ... 'two'], ... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], - ... 'baz': [1, 2, 3, 4, 5, 6]}) + ... 'baz': [1, 2, 3, 4, 5, 6], + ... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) >>> df - foo bar baz - 0 one A 1 - 1 one B 2 - 2 one C 3 - 3 two A 4 - 4 two B 5 - 5 two C 6 + foo bar baz zoo + 0 one A 1 x + 1 one B 2 y + 2 one C 3 z + 3 two A 4 q + 4 two B 5 w + 5 two C 6 t >>> df.pivot(index='foo', columns='bar', values='baz') bar A B C @@ -5011,6 +5015,13 @@ def pivot(self, index=None, columns=None, values=None): one 1 2 3 two 4 5 6 + >>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo']) + baz zoo + bar A B C A B C + foo + one 1 2 3 x y z + two 4 5 6 q w t + A ValueError is raised if there are any duplicates. >>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'], diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 3ef152d091b24..389f1af48434a 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -392,16 +392,21 @@ def pivot(self, index=None, columns=None, values=None): cols = [columns] if index is None else [index, columns] append = index is None indexed = self.set_index(cols, append=append) - return indexed.unstack(columns) else: if index is None: index = self.index else: index = self[index] - indexed = self._constructor_sliced( - self[values].values, - index=MultiIndex.from_arrays([index, self[columns]])) - return indexed.unstack(columns) + index = MultiIndex.from_arrays([index, self[columns]]) + + if is_list_like(values) and not isinstance(values, tuple): + # Exclude tuple because it is seen as a single column name + indexed = self._constructor(self[values].values, index=index, + columns=values) + else: + indexed = self._constructor_sliced(self[values].values, + index=index) + return indexed.unstack(columns) def pivot_simple(index, columns, values): diff --git a/pandas/tests/reshape/test_pivot.py b/pandas/tests/reshape/test_pivot.py index 786c57a4a82df..92bedbabdf2f1 100644 --- a/pandas/tests/reshape/test_pivot.py +++ b/pandas/tests/reshape/test_pivot.py @@ -371,6 +371,89 @@ def test_pivot_periods(self): pv = df.pivot(index='p1', columns='p2', values='data1') tm.assert_frame_equal(pv, expected) + @pytest.mark.parametrize('values', [ + ['baz', 'zoo'], np.array(['baz', 'zoo']), + pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo']) + ]) + def test_pivot_with_list_like_values(self, values): + # issue #17160 + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + + result = df.pivot(index='foo', columns='bar', values=values) + + data = [[1, 2, 3, 'x', 'y', 'z'], + [4, 5, 6, 'q', 'w', 't']] + index = Index(data=['one', 'two'], name='foo') + columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']], + labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], + names=[None, 'bar']) + expected = DataFrame(data=data, index=index, + columns=columns, dtype='object') + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize('values', [ + ['bar', 'baz'], np.array(['bar', 'baz']), + pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz']) + ]) + def test_pivot_with_list_like_values_nans(self, values): + # issue #17160 + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + + result = df.pivot(index='zoo', columns='foo', values=values) + + data = [[np.nan, 'A', np.nan, 4], + [np.nan, 'C', np.nan, 6], + [np.nan, 'B', np.nan, 5], + ['A', np.nan, 1, np.nan], + ['B', np.nan, 2, np.nan], + ['C', np.nan, 3, np.nan]] + index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo') + columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]], + names=[None, 'foo']) + expected = DataFrame(data=data, index=index, + columns=columns, dtype='object') + tm.assert_frame_equal(result, expected) + + @pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails' + 'with KeyError #19966') + def test_pivot_with_multiindex(self): + # issue #17160 + index = Index(data=[0, 1, 2, 3, 4, 5]) + data = [['one', 'A', 1, 'x'], + ['one', 'B', 2, 'y'], + ['one', 'C', 3, 'z'], + ['two', 'A', 4, 'q'], + ['two', 'B', 5, 'w'], + ['two', 'C', 6, 't']] + columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']], + labels=[[0, 0, 1, 1], [0, 1, 0, 1]]) + df = DataFrame(data=data, index=index, columns=columns, dtype='object') + result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'), + values=('baz', 'first')) + + data = {'A': Series([1, 4], index=['one', 'two']), + 'B': Series([2, 5], index=['one', 'two']), + 'C': Series([3, 6], index=['one', 'two'])} + expected = DataFrame(data) + tm.assert_frame_equal(result, expected) + + def test_pivot_with_tuple_of_values(self): + # issue #17160 + df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'], + 'bar': ['A', 'B', 'C', 'A', 'B', 'C'], + 'baz': [1, 2, 3, 4, 5, 6], + 'zoo': ['x', 'y', 'z', 'q', 'w', 't']}) + with pytest.raises(KeyError): + # tuple is seen as a single column name + df.pivot(index='zoo', columns='foo', values=('bar', 'baz')) + def test_margins(self): def _check_output(result, values_col, index=['A', 'B'], columns=['C'],