Skip to content

Commit eecb129

Browse files
ibrahimsharafjavadnoorb
authored andcommitted
ENH: DataFrame.pivot accepts a list of values (pandas-dev#18636)
1 parent 40a91c5 commit eecb129

File tree

4 files changed

+115
-15
lines changed

4 files changed

+115
-15
lines changed

doc/source/whatsnew/v0.23.0.txt

+1
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,7 @@ Other Enhancements
326326
- ``Resampler`` objects now have a functioning :attr:`~pandas.core.resample.Resampler.pipe` method.
327327
Previously, calls to ``pipe`` were diverted to the ``mean`` method (:issue:`17905`).
328328
- :func:`~pandas.api.types.is_scalar` now returns ``True`` for ``DateOffset`` objects (:issue:`18943`).
329+
- :func:`DataFrame.pivot` now accepts a list for the ``values=`` kwarg (:issue:`17160`).
329330
- Added :func:`pandas.api.extensions.register_dataframe_accessor`,
330331
:func:`pandas.api.extensions.register_series_accessor`, and
331332
:func:`pandas.api.extensions.register_index_accessor`, accessor for libraries downstream of pandas

pandas/core/frame.py

+21-10
Original file line numberDiff line numberDiff line change
@@ -5050,11 +5050,14 @@ def pivot(self, index=None, columns=None, values=None):
50505050
existing index.
50515051
columns : string or object
50525052
Column to use to make new frame's columns.
5053-
values : string or object, optional
5054-
Column to use for populating new frame's values. If not
5053+
values : string, object or a list of the previous, optional
5054+
Column(s) to use for populating new frame's values. If not
50555055
specified, all remaining columns will be used and the result will
50565056
have hierarchically indexed columns.
50575057
5058+
.. versionchanged :: 0.23.0
5059+
Also accept list of column names.
5060+
50585061
Returns
50595062
-------
50605063
DataFrame
@@ -5083,15 +5086,16 @@ def pivot(self, index=None, columns=None, values=None):
50835086
>>> df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two',
50845087
... 'two'],
50855088
... 'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
5086-
... 'baz': [1, 2, 3, 4, 5, 6]})
5089+
... 'baz': [1, 2, 3, 4, 5, 6],
5090+
... 'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
50875091
>>> df
5088-
foo bar baz
5089-
0 one A 1
5090-
1 one B 2
5091-
2 one C 3
5092-
3 two A 4
5093-
4 two B 5
5094-
5 two C 6
5092+
foo bar baz zoo
5093+
0 one A 1 x
5094+
1 one B 2 y
5095+
2 one C 3 z
5096+
3 two A 4 q
5097+
4 two B 5 w
5098+
5 two C 6 t
50955099
50965100
>>> df.pivot(index='foo', columns='bar', values='baz')
50975101
bar A B C
@@ -5105,6 +5109,13 @@ def pivot(self, index=None, columns=None, values=None):
51055109
one 1 2 3
51065110
two 4 5 6
51075111
5112+
>>> df.pivot(index='foo', columns='bar', values=['baz', 'zoo'])
5113+
baz zoo
5114+
bar A B C A B C
5115+
foo
5116+
one 1 2 3 x y z
5117+
two 4 5 6 q w t
5118+
51085119
A ValueError is raised if there are any duplicates.
51095120
51105121
>>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],

pandas/core/reshape/reshape.py

+10-5
Original file line numberDiff line numberDiff line change
@@ -392,16 +392,21 @@ def pivot(self, index=None, columns=None, values=None):
392392
cols = [columns] if index is None else [index, columns]
393393
append = index is None
394394
indexed = self.set_index(cols, append=append)
395-
return indexed.unstack(columns)
396395
else:
397396
if index is None:
398397
index = self.index
399398
else:
400399
index = self[index]
401-
indexed = self._constructor_sliced(
402-
self[values].values,
403-
index=MultiIndex.from_arrays([index, self[columns]]))
404-
return indexed.unstack(columns)
400+
index = MultiIndex.from_arrays([index, self[columns]])
401+
402+
if is_list_like(values) and not isinstance(values, tuple):
403+
# Exclude tuple because it is seen as a single column name
404+
indexed = self._constructor(self[values].values, index=index,
405+
columns=values)
406+
else:
407+
indexed = self._constructor_sliced(self[values].values,
408+
index=index)
409+
return indexed.unstack(columns)
405410

406411

407412
def pivot_simple(index, columns, values):

pandas/tests/reshape/test_pivot.py

+83
Original file line numberDiff line numberDiff line change
@@ -371,6 +371,89 @@ def test_pivot_periods(self):
371371
pv = df.pivot(index='p1', columns='p2', values='data1')
372372
tm.assert_frame_equal(pv, expected)
373373

374+
@pytest.mark.parametrize('values', [
375+
['baz', 'zoo'], np.array(['baz', 'zoo']),
376+
pd.Series(['baz', 'zoo']), pd.Index(['baz', 'zoo'])
377+
])
378+
def test_pivot_with_list_like_values(self, values):
379+
# issue #17160
380+
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
381+
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
382+
'baz': [1, 2, 3, 4, 5, 6],
383+
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
384+
385+
result = df.pivot(index='foo', columns='bar', values=values)
386+
387+
data = [[1, 2, 3, 'x', 'y', 'z'],
388+
[4, 5, 6, 'q', 'w', 't']]
389+
index = Index(data=['one', 'two'], name='foo')
390+
columns = MultiIndex(levels=[['baz', 'zoo'], ['A', 'B', 'C']],
391+
labels=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]],
392+
names=[None, 'bar'])
393+
expected = DataFrame(data=data, index=index,
394+
columns=columns, dtype='object')
395+
tm.assert_frame_equal(result, expected)
396+
397+
@pytest.mark.parametrize('values', [
398+
['bar', 'baz'], np.array(['bar', 'baz']),
399+
pd.Series(['bar', 'baz']), pd.Index(['bar', 'baz'])
400+
])
401+
def test_pivot_with_list_like_values_nans(self, values):
402+
# issue #17160
403+
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
404+
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
405+
'baz': [1, 2, 3, 4, 5, 6],
406+
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
407+
408+
result = df.pivot(index='zoo', columns='foo', values=values)
409+
410+
data = [[np.nan, 'A', np.nan, 4],
411+
[np.nan, 'C', np.nan, 6],
412+
[np.nan, 'B', np.nan, 5],
413+
['A', np.nan, 1, np.nan],
414+
['B', np.nan, 2, np.nan],
415+
['C', np.nan, 3, np.nan]]
416+
index = Index(data=['q', 't', 'w', 'x', 'y', 'z'], name='zoo')
417+
columns = MultiIndex(levels=[['bar', 'baz'], ['one', 'two']],
418+
labels=[[0, 0, 1, 1], [0, 1, 0, 1]],
419+
names=[None, 'foo'])
420+
expected = DataFrame(data=data, index=index,
421+
columns=columns, dtype='object')
422+
tm.assert_frame_equal(result, expected)
423+
424+
@pytest.mark.xfail(reason='MultiIndexed unstack with tuple names fails'
425+
'with KeyError #19966')
426+
def test_pivot_with_multiindex(self):
427+
# issue #17160
428+
index = Index(data=[0, 1, 2, 3, 4, 5])
429+
data = [['one', 'A', 1, 'x'],
430+
['one', 'B', 2, 'y'],
431+
['one', 'C', 3, 'z'],
432+
['two', 'A', 4, 'q'],
433+
['two', 'B', 5, 'w'],
434+
['two', 'C', 6, 't']]
435+
columns = MultiIndex(levels=[['bar', 'baz'], ['first', 'second']],
436+
labels=[[0, 0, 1, 1], [0, 1, 0, 1]])
437+
df = DataFrame(data=data, index=index, columns=columns, dtype='object')
438+
result = df.pivot(index=('bar', 'first'), columns=('bar', 'second'),
439+
values=('baz', 'first'))
440+
441+
data = {'A': Series([1, 4], index=['one', 'two']),
442+
'B': Series([2, 5], index=['one', 'two']),
443+
'C': Series([3, 6], index=['one', 'two'])}
444+
expected = DataFrame(data)
445+
tm.assert_frame_equal(result, expected)
446+
447+
def test_pivot_with_tuple_of_values(self):
448+
# issue #17160
449+
df = pd.DataFrame({'foo': ['one', 'one', 'one', 'two', 'two', 'two'],
450+
'bar': ['A', 'B', 'C', 'A', 'B', 'C'],
451+
'baz': [1, 2, 3, 4, 5, 6],
452+
'zoo': ['x', 'y', 'z', 'q', 'w', 't']})
453+
with pytest.raises(KeyError):
454+
# tuple is seen as a single column name
455+
df.pivot(index='zoo', columns='foo', values=('bar', 'baz'))
456+
374457
def test_margins(self):
375458
def _check_output(result, values_col, index=['A', 'B'],
376459
columns=['C'],

0 commit comments

Comments
 (0)