diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index b7adf7bf0d80d..e349745d51e83 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -141,6 +141,7 @@ Reshaping - - Bug in :meth:`DataFrame.pivot_table` when only MultiIndexed columns is set (:issue:`17038`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` can take tuple names in MultiIndexed data (:issue:`19966`) - Bug in :meth:`DataFrame.pivot_table` when ``margin`` is ``True`` and only ``column`` is defined (:issue:`31016`) - Fix incorrect error message in :meth:`DataFrame.pivot` when ``columns`` is set to ``None``. (:issue:`30924`) - Bug in :func:`crosstab` when inputs are two Series and have tuple names, the output will keep dummy MultiIndex as columns. (:issue:`18321`) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index 97f416e32d07b..fab9f41cb6c4f 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -317,6 +317,10 @@ def _unstack_multiple(data, clocs, fill_value=None): index = data.index + # GH 19966 Make sure if MultiIndexed index has tuple name, they will be + # recognised as a whole + if clocs in index.names: + clocs = [clocs] clocs = [index._get_level_number(i) for i in clocs] rlocs = [i for i in range(index.nlevels) if i not in clocs] diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py index 60b7611c8b9be..b3af5a7b7317e 100644 --- a/pandas/tests/frame/test_reshape.py +++ b/pandas/tests/frame/test_reshape.py @@ -336,6 +336,80 @@ def test_unstack_fill_frame_categorical(self): ) tm.assert_frame_equal(result, expected) + def test_unstack_tuplename_in_multiindex(self): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + df = pd.DataFrame({"d": [1] * 9, "e": [2] * 9}, index=idx) + result = df.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2], [1, 1, 1, 2, 2, 2]], + columns=pd.MultiIndex.from_tuples( + [ + ("d", "a"), + ("d", "b"), + ("d", "c"), + ("e", "a"), + ("e", "b"), + ("e", "c"), + ], + names=[None, ("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + @pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2], [1, 1, 2, 2]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples( + [("d", "a"), ("d", "b"), ("e", "a"), ("e", "b")], + names=[None, ("A", "a")], + ), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1, 2, 2, 2, 2], [1, 1, 1, 1, 2, 2, 2, 2]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [ + ("d", "a", 1), + ("d", "a", 2), + ("d", "b", 1), + ("d", "b", 2), + ("e", "a", 1), + ("e", "a", 2), + ("e", "b", 1), + ("e", "b", 2), + ], + names=[None, ("A", "a"), "B"], + ), + ), + ], + ) + def test_unstack_mixed_type_name_in_multiindex( + self, unstack_idx, expected_values, expected_index, expected_columns + ): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + df = pd.DataFrame({"d": [1] * 8, "e": [2] * 8}, index=idx) + result = df.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected) + def test_unstack_preserve_dtypes(self): # Checks fix for #11847 df = pd.DataFrame( diff --git a/pandas/tests/series/test_analytics.py b/pandas/tests/series/test_analytics.py index c29bd3ea0cb7d..e6e91b5d4f5f4 100644 --- a/pandas/tests/series/test_analytics.py +++ b/pandas/tests/series/test_analytics.py @@ -6,7 +6,7 @@ import pandas.util._test_decorators as td import pandas as pd -from pandas import DataFrame, MultiIndex, Series +from pandas import DataFrame, Series import pandas._testing as tm @@ -160,65 +160,6 @@ def test_is_monotonic(self): assert s.is_monotonic is False assert s.is_monotonic_decreasing is True - def test_unstack(self): - - index = MultiIndex( - levels=[["bar", "foo"], ["one", "three", "two"]], - codes=[[1, 1, 0, 0], [0, 1, 0, 2]], - ) - - s = Series(np.arange(4.0), index=index) - unstacked = s.unstack() - - expected = DataFrame( - [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], - index=["bar", "foo"], - columns=["one", "three", "two"], - ) - - tm.assert_frame_equal(unstacked, expected) - - unstacked = s.unstack(level=0) - tm.assert_frame_equal(unstacked, expected.T) - - index = MultiIndex( - levels=[["bar"], ["one", "two", "three"], [0, 1]], - codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - s = Series(np.random.randn(6), index=index) - exp_index = MultiIndex( - levels=[["one", "two", "three"], [0, 1]], - codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], - ) - expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) - unstacked = s.unstack(0).sort_index() - tm.assert_frame_equal(unstacked, expected) - - # GH5873 - idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) - ts = pd.Series([1, 2], index=idx) - left = ts.unstack() - right = DataFrame( - [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] - ) - tm.assert_frame_equal(left, right) - - idx = pd.MultiIndex.from_arrays( - [ - ["cat", "cat", "cat", "dog", "dog"], - ["a", "a", "b", "a", "b"], - [1, 2, 1, 1, np.nan], - ] - ) - ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) - right = DataFrame( - [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], - columns=["cat", "dog"], - ) - tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] - right.index = pd.MultiIndex.from_tuples(tpls) - tm.assert_frame_equal(ts.unstack(level=0), right) - @pytest.mark.parametrize("func", [np.any, np.all]) @pytest.mark.parametrize("kwargs", [dict(keepdims=True), dict(out=object())]) @td.skip_if_np_lt("1.15") diff --git a/pandas/tests/series/test_reshaping.py b/pandas/tests/series/test_reshaping.py new file mode 100644 index 0000000000000..7645fb8759a54 --- /dev/null +++ b/pandas/tests/series/test_reshaping.py @@ -0,0 +1,120 @@ +import numpy as np +import pytest + +import pandas as pd +from pandas import DataFrame, MultiIndex, Series +import pandas._testing as tm + + +def test_unstack(): + index = MultiIndex( + levels=[["bar", "foo"], ["one", "three", "two"]], + codes=[[1, 1, 0, 0], [0, 1, 0, 2]], + ) + + s = Series(np.arange(4.0), index=index) + unstacked = s.unstack() + + expected = DataFrame( + [[2.0, np.nan, 3.0], [0.0, 1.0, np.nan]], + index=["bar", "foo"], + columns=["one", "three", "two"], + ) + + tm.assert_frame_equal(unstacked, expected) + + unstacked = s.unstack(level=0) + tm.assert_frame_equal(unstacked, expected.T) + + index = MultiIndex( + levels=[["bar"], ["one", "two", "three"], [0, 1]], + codes=[[0, 0, 0, 0, 0, 0], [0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + s = Series(np.random.randn(6), index=index) + exp_index = MultiIndex( + levels=[["one", "two", "three"], [0, 1]], + codes=[[0, 1, 2, 0, 1, 2], [0, 1, 0, 1, 0, 1]], + ) + expected = DataFrame({"bar": s.values}, index=exp_index).sort_index(level=0) + unstacked = s.unstack(0).sort_index() + tm.assert_frame_equal(unstacked, expected) + + # GH5873 + idx = pd.MultiIndex.from_arrays([[101, 102], [3.5, np.nan]]) + ts = pd.Series([1, 2], index=idx) + left = ts.unstack() + right = DataFrame( + [[np.nan, 1], [2, np.nan]], index=[101, 102], columns=[np.nan, 3.5] + ) + tm.assert_frame_equal(left, right) + + idx = pd.MultiIndex.from_arrays( + [ + ["cat", "cat", "cat", "dog", "dog"], + ["a", "a", "b", "a", "b"], + [1, 2, 1, 1, np.nan], + ] + ) + ts = pd.Series([1.0, 1.1, 1.2, 1.3, 1.4], index=idx) + right = DataFrame( + [[1.0, 1.3], [1.1, np.nan], [np.nan, 1.4], [1.2, np.nan]], + columns=["cat", "dog"], + ) + tpls = [("a", 1), ("a", 2), ("b", np.nan), ("b", 1)] + right.index = pd.MultiIndex.from_tuples(tpls) + tm.assert_frame_equal(ts.unstack(level=0), right) + + +def test_unstack_tuplename_in_multiindex(): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b", "c"], [1, 2, 3]], names=[("A", "a"), ("B", "b")] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(("A", "a")) + + expected = pd.DataFrame( + [[1, 1, 1], [1, 1, 1], [1, 1, 1]], + columns=pd.MultiIndex.from_tuples( + [("a",), ("b",), ("c",)], names=[("A", "a")], + ), + index=pd.Index([1, 2, 3], name=("B", "b")), + ) + tm.assert_frame_equal(result, expected) + + +@pytest.mark.parametrize( + "unstack_idx, expected_values, expected_index, expected_columns", + [ + ( + ("A", "a"), + [[1, 1], [1, 1], [1, 1], [1, 1]], + pd.MultiIndex.from_tuples( + [(1, 3), (1, 4), (2, 3), (2, 4)], names=["B", "C"] + ), + pd.MultiIndex.from_tuples([("a",), ("b",)], names=[("A", "a")]), + ), + ( + (("A", "a"), "B"), + [[1, 1, 1, 1], [1, 1, 1, 1]], + pd.Index([3, 4], name="C"), + pd.MultiIndex.from_tuples( + [("a", 1), ("a", 2), ("b", 1), ("b", 2)], names=[("A", "a"), "B"] + ), + ), + ], +) +def test_unstack_mixed_type_name_in_multiindex( + unstack_idx, expected_values, expected_index, expected_columns +): + # GH 19966 + idx = pd.MultiIndex.from_product( + [["a", "b"], [1, 2], [3, 4]], names=[("A", "a"), "B", "C"] + ) + ser = pd.Series(1, index=idx) + result = ser.unstack(unstack_idx) + + expected = pd.DataFrame( + expected_values, columns=expected_columns, index=expected_index, + ) + tm.assert_frame_equal(result, expected)