Skip to content

Commit 11f1500

Browse files
ENH: Allow multi values for index and columns in df.pivot (#30928)
1 parent d2a14ac commit 11f1500

File tree

4 files changed

+254
-6
lines changed

4 files changed

+254
-6
lines changed

doc/source/whatsnew/v1.1.0.rst

+2
Original file line numberDiff line numberDiff line change
@@ -208,8 +208,10 @@ Reshaping
208208
- Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`)
209209
- Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`)
210210
- Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`)
211+
- :meth:`DataFrame.pivot` can now take lists for ``index`` and ``columns`` arguments (:issue:`21425`)
211212
- Bug in :func:`concat` where the resulting indices are not copied when ``copy=True`` (:issue:`29879`)
212213

214+
213215
Sparse
214216
^^^^^^
215217

pandas/core/frame.py

+42-2
Original file line numberDiff line numberDiff line change
@@ -5889,11 +5889,19 @@ def groupby(
58895889
58905890
Parameters
58915891
----------%s
5892-
index : str or object, optional
5892+
index : str or object or a list of str, optional
58935893
Column to use to make new frame's index. If None, uses
58945894
existing index.
5895-
columns : str or object
5895+
5896+
.. versionchanged:: 1.1.0
5897+
Also accept list of index names.
5898+
5899+
columns : str or object or a list of str
58965900
Column to use to make new frame's columns.
5901+
5902+
.. versionchanged:: 1.1.0
5903+
Also accept list of columns names.
5904+
58975905
values : str, object or a list of the previous, optional
58985906
Column(s) to use for populating new frame's values. If not
58995907
specified, all remaining columns will be used and the result will
@@ -5960,6 +5968,38 @@ def groupby(
59605968
one 1 2 3 x y z
59615969
two 4 5 6 q w t
59625970
5971+
You could also assign a list of column names or a list of index names.
5972+
5973+
>>> df = pd.DataFrame({
5974+
... "lev1": [1, 1, 1, 2, 2, 2],
5975+
... "lev2": [1, 1, 2, 1, 1, 2],
5976+
... "lev3": [1, 2, 1, 2, 1, 2],
5977+
... "lev4": [1, 2, 3, 4, 5, 6],
5978+
... "values": [0, 1, 2, 3, 4, 5]})
5979+
>>> df
5980+
lev1 lev2 lev3 lev4 values
5981+
0 1 1 1 1 0
5982+
1 1 1 2 2 1
5983+
2 1 2 1 3 2
5984+
3 2 1 2 4 3
5985+
4 2 1 1 5 4
5986+
5 2 2 2 6 5
5987+
5988+
>>> df.pivot(index="lev1", columns=["lev2", "lev3"],values="values")
5989+
lev2 1 2
5990+
lev3 1 2 1 2
5991+
lev1
5992+
1 0.0 1.0 2.0 NaN
5993+
2 4.0 3.0 NaN 5.0
5994+
5995+
>>> df.pivot(index=["lev1", "lev2"], columns=["lev3"],values="values")
5996+
lev3 1 2
5997+
lev1 lev2
5998+
1 1 0.0 1.0
5999+
2 2.0 NaN
6000+
2 1 4.0 3.0
6001+
2 NaN 5.0
6002+
59636003
A ValueError is raised if there are any duplicates.
59646004
59656005
>>> df = pd.DataFrame({"foo": ['one', 'one', 'two', 'two'],

pandas/core/reshape/pivot.py

+18-4
Original file line numberDiff line numberDiff line change
@@ -425,17 +425,31 @@ def _convert_by(by):
425425
def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFrame":
426426
if columns is None:
427427
raise TypeError("pivot() missing 1 required argument: 'columns'")
428+
columns = columns if is_list_like(columns) else [columns]
428429

429430
if values is None:
430-
cols = [columns] if index is None else [index, columns]
431+
cols: List[str] = []
432+
if index is None:
433+
pass
434+
elif is_list_like(index):
435+
cols = list(index)
436+
else:
437+
cols = [index]
438+
cols.extend(columns)
439+
431440
append = index is None
432441
indexed = data.set_index(cols, append=append)
433442
else:
434443
if index is None:
435-
index = data.index
444+
index = [Series(data.index, name=data.index.name)]
445+
elif is_list_like(index):
446+
index = [data[idx] for idx in index]
436447
else:
437-
index = data[index]
438-
index = MultiIndex.from_arrays([index, data[columns]])
448+
index = [data[index]]
449+
450+
data_columns = [data[col] for col in columns]
451+
index.extend(data_columns)
452+
index = MultiIndex.from_arrays(index)
439453

440454
if is_list_like(values) and not isinstance(values, tuple):
441455
# Exclude tuple because it is seen as a single column name
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,192 @@
1+
import numpy as np
2+
import pytest
3+
4+
import pandas as pd
5+
from pandas import Index, MultiIndex
6+
import pandas._testing as tm
7+
8+
9+
@pytest.mark.parametrize(
10+
"input_index, input_columns, input_values, "
11+
"expected_values, expected_columns, expected_index",
12+
[
13+
(
14+
["lev4"],
15+
"lev3",
16+
"values",
17+
[
18+
[0.0, np.nan],
19+
[np.nan, 1.0],
20+
[2.0, np.nan],
21+
[np.nan, 3.0],
22+
[4.0, np.nan],
23+
[np.nan, 5.0],
24+
[6.0, np.nan],
25+
[np.nan, 7.0],
26+
],
27+
Index([1, 2], name="lev3"),
28+
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
29+
),
30+
(
31+
["lev4"],
32+
"lev3",
33+
None,
34+
[
35+
[1.0, np.nan, 1.0, np.nan, 0.0, np.nan],
36+
[np.nan, 1.0, np.nan, 1.0, np.nan, 1.0],
37+
[1.0, np.nan, 2.0, np.nan, 2.0, np.nan],
38+
[np.nan, 1.0, np.nan, 2.0, np.nan, 3.0],
39+
[2.0, np.nan, 1.0, np.nan, 4.0, np.nan],
40+
[np.nan, 2.0, np.nan, 1.0, np.nan, 5.0],
41+
[2.0, np.nan, 2.0, np.nan, 6.0, np.nan],
42+
[np.nan, 2.0, np.nan, 2.0, np.nan, 7.0],
43+
],
44+
MultiIndex.from_tuples(
45+
[
46+
("lev1", 1),
47+
("lev1", 2),
48+
("lev2", 1),
49+
("lev2", 2),
50+
("values", 1),
51+
("values", 2),
52+
],
53+
names=[None, "lev3"],
54+
),
55+
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
56+
),
57+
(
58+
["lev1", "lev2"],
59+
"lev3",
60+
"values",
61+
[[0, 1], [2, 3], [4, 5], [6, 7]],
62+
Index([1, 2], name="lev3"),
63+
MultiIndex.from_tuples(
64+
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
65+
),
66+
),
67+
(
68+
["lev1", "lev2"],
69+
"lev3",
70+
None,
71+
[[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]],
72+
MultiIndex.from_tuples(
73+
[("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)],
74+
names=[None, "lev3"],
75+
),
76+
MultiIndex.from_tuples(
77+
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
78+
),
79+
),
80+
],
81+
)
82+
def test_pivot_list_like_index(
83+
input_index,
84+
input_columns,
85+
input_values,
86+
expected_values,
87+
expected_columns,
88+
expected_index,
89+
):
90+
# GH 21425, test when index is given a list
91+
df = pd.DataFrame(
92+
{
93+
"lev1": [1, 1, 1, 1, 2, 2, 2, 2],
94+
"lev2": [1, 1, 2, 2, 1, 1, 2, 2],
95+
"lev3": [1, 2, 1, 2, 1, 2, 1, 2],
96+
"lev4": [1, 2, 3, 4, 5, 6, 7, 8],
97+
"values": [0, 1, 2, 3, 4, 5, 6, 7],
98+
}
99+
)
100+
101+
result = df.pivot(index=input_index, columns=input_columns, values=input_values)
102+
expected = pd.DataFrame(
103+
expected_values, columns=expected_columns, index=expected_index
104+
)
105+
tm.assert_frame_equal(result, expected)
106+
107+
108+
@pytest.mark.parametrize(
109+
"input_index, input_columns, input_values, "
110+
"expected_values, expected_columns, expected_index",
111+
[
112+
(
113+
"lev4",
114+
["lev3"],
115+
"values",
116+
[
117+
[0.0, np.nan],
118+
[np.nan, 1.0],
119+
[2.0, np.nan],
120+
[np.nan, 3.0],
121+
[4.0, np.nan],
122+
[np.nan, 5.0],
123+
[6.0, np.nan],
124+
[np.nan, 7.0],
125+
],
126+
Index([1, 2], name="lev3"),
127+
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"),
128+
),
129+
(
130+
["lev1", "lev2"],
131+
["lev3"],
132+
"values",
133+
[[0, 1], [2, 3], [4, 5], [6, 7]],
134+
Index([1, 2], name="lev3"),
135+
MultiIndex.from_tuples(
136+
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
137+
),
138+
),
139+
(
140+
["lev1"],
141+
["lev2", "lev3"],
142+
"values",
143+
[[0, 1, 2, 3], [4, 5, 6, 7]],
144+
MultiIndex.from_tuples(
145+
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"]
146+
),
147+
Index([1, 2], name="lev1"),
148+
),
149+
(
150+
["lev1", "lev2"],
151+
["lev3", "lev4"],
152+
"values",
153+
[
154+
[0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan],
155+
[np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan],
156+
[np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan],
157+
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0],
158+
],
159+
MultiIndex.from_tuples(
160+
[(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)],
161+
names=["lev3", "lev4"],
162+
),
163+
MultiIndex.from_tuples(
164+
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"]
165+
),
166+
),
167+
],
168+
)
169+
def test_pivot_list_like_columns(
170+
input_index,
171+
input_columns,
172+
input_values,
173+
expected_values,
174+
expected_columns,
175+
expected_index,
176+
):
177+
# GH 21425, test when columns is given a list
178+
df = pd.DataFrame(
179+
{
180+
"lev1": [1, 1, 1, 1, 2, 2, 2, 2],
181+
"lev2": [1, 1, 2, 2, 1, 1, 2, 2],
182+
"lev3": [1, 2, 1, 2, 1, 2, 1, 2],
183+
"lev4": [1, 2, 3, 4, 5, 6, 7, 8],
184+
"values": [0, 1, 2, 3, 4, 5, 6, 7],
185+
}
186+
)
187+
188+
result = df.pivot(index=input_index, columns=input_columns, values=input_values)
189+
expected = pd.DataFrame(
190+
expected_values, columns=expected_columns, index=expected_index
191+
)
192+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)