Skip to content

BUG: Fix MutliIndexed unstack failures at tuple names #30943

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 12 commits into from
Jan 20, 2020
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1133,6 +1133,7 @@ Reshaping
- Bug in :func:`merge_asof` merging on a tz-aware ``left_index`` and ``right_on`` a tz-aware column (:issue:`29864`)
- Improved error message and docstring in :func:`cut` and :func:`qcut` when `labels=True` (:issue:`13318`)
- Bug in missing `fill_na` parameter to :meth:`DataFrame.unstack` with list of levels (:issue:`30740`)
- Bug in :func:`unstack` can take tuple names in MultiIndexed data (:issue:`19966`)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

:func:`unstack` doesn't exist, so instead should probably be ":meth:`DataFrame.unstack` and :meth:`Series.unstack`"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks! changed!


Sparse
^^^^^^
Expand Down
2 changes: 2 additions & 0 deletions pandas/core/reshape/reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,8 @@ def _unstack_multiple(data, clocs, fill_value=None):

index = data.index

if clocs in index.names:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here on what is going on

clocs = [clocs]
clocs = [index._get_level_number(i) for i in clocs]

rlocs = [i for i in range(index.nlevels) if i not in clocs]
Expand Down
74 changes: 74 additions & 0 deletions pandas/tests/frame/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,80 @@ def test_unstack_fill_frame_categorical(self):
)
tm.assert_frame_equal(result, expected)

def test_unstack_tuplename_in_multiindex(self):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx)
result = df.unstack(("A", "a"))

expected = pd.DataFrame(
[[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]],
columns=pd.MultiIndex.from_tuples(
[
("d", "a"),
("d", "b"),
("d", "c"),
("e", "a"),
("e", "b"),
("e", "c"),
],
names=[None, ("A", "a")],
),
index=pd.Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]],
pd.MultiIndex.from_tuples(
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
),
pd.MultiIndex.from_tuples(
[("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")],
names=[None, ("A", "a")],
),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]],
pd.Index([3, 4], name="C"),
pd.MultiIndex.from_tuples(
[
("d", "a", 1),
("d", "a", 2),
("d", "b", 1),
("d", "b", 2),
("e", "a", 1),
("e", "a", 2),
("e", "b", 1),
("e", "b", 2),
],
names=[None, ("A", "a"), "B"],
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
self, unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx)
result = df.unstack(unstack_idx)

expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index,
)
tm.assert_frame_equal(result, expected)

def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = pd.DataFrame(
Expand Down
59 changes: 59 additions & 0 deletions pandas/tests/series/test_reshape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
import pytest
Copy link
Member Author

@charlesdong1991 charlesdong1991 Jan 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did not find test for series.unstack, so just created one, pls let me know where the tests are stored, then I could move this part into the corresponding file.

Thanks!

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

See tests/series/test_analytics.py:

def test_unstack(self):

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ahh! many thanks! @jschendel

not sure if it is the best place for tests for unstack since analytics sounds a bit weird to unstack, maybe worth a follow-up to move to somewhere else @jreback

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah don't appear to be many, could create (and move the existing ones from test_analytics) to pandas/tests/series/test_reshaping.py to mirror frame


import pandas as pd
import pandas._testing as tm


def test_unstack_tuplename_in_multiindex():
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")]
)
ser = pd.Series(1, index=idx)
result = ser.unstack(("A", "a"))

expected = pd.DataFrame(
[[1, 1, 1], [1, 1, 1], [1, 1, 1]],
columns=pd.MultiIndex.from_tuples(
[("a",), ("b",), ("c",)], names=[("A", "a")],
),
index=pd.Index([1, 2, 3], name=("B", "b")),
)
tm.assert_frame_equal(result, expected)


@pytest.mark.parametrize(
"unstack_idx, expected_values, expected_index, expected_columns",
[
(
("A", "a"),
[[1, 1], [1, 1], [1, 1], [1, 1]],
pd.MultiIndex.from_tuples(
[(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"]
),
pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]),
),
(
(("A", "a"), "B"),
[[1, 1, 1, 1], [1, 1, 1, 1]],
pd.Index([3, 4], name="C"),
pd.MultiIndex.from_tuples(
[("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"]
),
),
],
)
def test_unstack_mixed_type_name_in_multiindex(
unstack_idx, expected_values, expected_index, expected_columns
):
# GH 19966
idx = pd.MultiIndex.from_product(
[["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"]
)
ser = pd.Series(1, index=idx)
result = ser.unstack(unstack_idx)

expected = pd.DataFrame(
expected_values, columns=expected_columns, index=expected_index,
)
tm.assert_frame_equal(result, expected)