-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
ENH: Allow multi values for index and columns in df.pivot #30928
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 22 commits
7e461a1
1314059
8bcb313
7bd097d
f7b37d6
180194d
98e9730
8739957
8a2af11
4cdd17a
ced4ec7
7f0ea51
abe991b
d312f56
3ed3a60
cc1826e
311670b
61d32b0
3aa04fa
9f5f170
ce0e85d
20a54ba
ab20be2
c70230b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -425,17 +425,31 @@ def _convert_by(by): | |
def pivot(data: "DataFrame", index=None, columns=None, values=None) -> "DataFrame": | ||
if columns is None: | ||
raise TypeError("pivot() missing 1 required argument: 'columns'") | ||
columns = columns if is_list_like(columns) else [columns] | ||
|
||
if values is None: | ||
cols = [columns] if index is None else [index, columns] | ||
cols: List[str] = [] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you want There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i don't think this will matter until we type the signature, so ok with handling this as a followon. |
||
if index is None: | ||
pass | ||
elif is_list_like(index): | ||
cols = list(index) | ||
else: | ||
cols = [index] | ||
cols.extend(columns) | ||
|
||
append = index is None | ||
indexed = data.set_index(cols, append=append) | ||
else: | ||
if index is None: | ||
index = data.index | ||
index = [Series(data.index, name=data.index.name)] | ||
elif is_list_like(index): | ||
index = [data[idx] for idx in index] | ||
else: | ||
index = data[index] | ||
index = MultiIndex.from_arrays([index, data[columns]]) | ||
index = [data[index]] | ||
|
||
data_columns = [data[col] for col in columns] | ||
index.extend(data_columns) | ||
index = MultiIndex.from_arrays(index) | ||
|
||
if is_list_like(values) and not isinstance(values, tuple): | ||
# Exclude tuple because it is seen as a single column name | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,192 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
import pandas as pd | ||
from pandas import Index, MultiIndex | ||
import pandas._testing as tm | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_index, input_columns, input_values, " | ||
"expected_values, expected_columns, expected_index", | ||
[ | ||
( | ||
["lev4"], | ||
"lev3", | ||
"values", | ||
[ | ||
[0.0, np.nan], | ||
[np.nan, 1.0], | ||
[2.0, np.nan], | ||
[np.nan, 3.0], | ||
[4.0, np.nan], | ||
[np.nan, 5.0], | ||
[6.0, np.nan], | ||
[np.nan, 7.0], | ||
], | ||
Index([1, 2], name="lev3"), | ||
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), | ||
), | ||
( | ||
["lev4"], | ||
"lev3", | ||
None, | ||
[ | ||
[1.0, np.nan, 1.0, np.nan, 0.0, np.nan], | ||
[np.nan, 1.0, np.nan, 1.0, np.nan, 1.0], | ||
[1.0, np.nan, 2.0, np.nan, 2.0, np.nan], | ||
[np.nan, 1.0, np.nan, 2.0, np.nan, 3.0], | ||
[2.0, np.nan, 1.0, np.nan, 4.0, np.nan], | ||
[np.nan, 2.0, np.nan, 1.0, np.nan, 5.0], | ||
[2.0, np.nan, 2.0, np.nan, 6.0, np.nan], | ||
[np.nan, 2.0, np.nan, 2.0, np.nan, 7.0], | ||
], | ||
MultiIndex.from_tuples( | ||
[ | ||
("lev1", 1), | ||
("lev1", 2), | ||
("lev2", 1), | ||
("lev2", 2), | ||
("values", 1), | ||
("values", 2), | ||
], | ||
names=[None, "lev3"], | ||
), | ||
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), | ||
), | ||
( | ||
["lev1", "lev2"], | ||
"lev3", | ||
"values", | ||
[[0, 1], [2, 3], [4, 5], [6, 7]], | ||
Index([1, 2], name="lev3"), | ||
MultiIndex.from_tuples( | ||
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] | ||
), | ||
), | ||
( | ||
["lev1", "lev2"], | ||
"lev3", | ||
None, | ||
[[1, 2, 0, 1], [3, 4, 2, 3], [5, 6, 4, 5], [7, 8, 6, 7]], | ||
MultiIndex.from_tuples( | ||
[("lev4", 1), ("lev4", 2), ("values", 1), ("values", 2)], | ||
names=[None, "lev3"], | ||
), | ||
MultiIndex.from_tuples( | ||
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] | ||
), | ||
), | ||
], | ||
) | ||
def test_pivot_list_like_index( | ||
input_index, | ||
input_columns, | ||
input_values, | ||
expected_values, | ||
expected_columns, | ||
expected_index, | ||
): | ||
# GH 21425, test when index is given a list | ||
df = pd.DataFrame( | ||
{ | ||
"lev1": [1, 1, 1, 1, 2, 2, 2, 2], | ||
"lev2": [1, 1, 2, 2, 1, 1, 2, 2], | ||
"lev3": [1, 2, 1, 2, 1, 2, 1, 2], | ||
"lev4": [1, 2, 3, 4, 5, 6, 7, 8], | ||
"values": [0, 1, 2, 3, 4, 5, 6, 7], | ||
} | ||
) | ||
|
||
result = df.pivot(index=input_index, columns=input_columns, values=input_values) | ||
expected = pd.DataFrame( | ||
expected_values, columns=expected_columns, index=expected_index | ||
) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"input_index, input_columns, input_values, " | ||
"expected_values, expected_columns, expected_index", | ||
[ | ||
( | ||
"lev4", | ||
["lev3"], | ||
"values", | ||
[ | ||
[0.0, np.nan], | ||
[np.nan, 1.0], | ||
[2.0, np.nan], | ||
[np.nan, 3.0], | ||
[4.0, np.nan], | ||
[np.nan, 5.0], | ||
[6.0, np.nan], | ||
[np.nan, 7.0], | ||
], | ||
Index([1, 2], name="lev3"), | ||
Index([1, 2, 3, 4, 5, 6, 7, 8], name="lev4"), | ||
), | ||
( | ||
["lev1", "lev2"], | ||
["lev3"], | ||
"values", | ||
[[0, 1], [2, 3], [4, 5], [6, 7]], | ||
Index([1, 2], name="lev3"), | ||
MultiIndex.from_tuples( | ||
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] | ||
), | ||
), | ||
( | ||
["lev1"], | ||
["lev2", "lev3"], | ||
"values", | ||
[[0, 1, 2, 3], [4, 5, 6, 7]], | ||
MultiIndex.from_tuples( | ||
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev2", "lev3"] | ||
), | ||
Index([1, 2], name="lev1"), | ||
), | ||
( | ||
["lev1", "lev2"], | ||
["lev3", "lev4"], | ||
"values", | ||
[ | ||
[0.0, 1.0, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan], | ||
[np.nan, np.nan, 2.0, 3.0, np.nan, np.nan, np.nan, np.nan], | ||
[np.nan, np.nan, np.nan, np.nan, 4.0, 5.0, np.nan, np.nan], | ||
[np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, 6.0, 7.0], | ||
], | ||
MultiIndex.from_tuples( | ||
[(1, 1), (2, 2), (1, 3), (2, 4), (1, 5), (2, 6), (1, 7), (2, 8)], | ||
names=["lev3", "lev4"], | ||
), | ||
MultiIndex.from_tuples( | ||
[(1, 1), (1, 2), (2, 1), (2, 2)], names=["lev1", "lev2"] | ||
), | ||
), | ||
], | ||
) | ||
def test_pivot_list_like_columns( | ||
input_index, | ||
input_columns, | ||
input_values, | ||
expected_values, | ||
expected_columns, | ||
expected_index, | ||
): | ||
# GH 21425, test when columns is given a list | ||
df = pd.DataFrame( | ||
{ | ||
"lev1": [1, 1, 1, 1, 2, 2, 2, 2], | ||
"lev2": [1, 1, 2, 2, 1, 1, 2, 2], | ||
"lev3": [1, 2, 1, 2, 1, 2, 1, 2], | ||
"lev4": [1, 2, 3, 4, 5, 6, 7, 8], | ||
"values": [0, 1, 2, 3, 4, 5, 6, 7], | ||
} | ||
) | ||
|
||
result = df.pivot(index=input_index, columns=input_columns, values=input_values) | ||
expected = pd.DataFrame( | ||
expected_values, columns=expected_columns, index=expected_index | ||
) | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
asa followon can you try to type tings in the signature