From 75a7a7851141523d17aa8cc2405f601953e6ddcc Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 14 Oct 2022 17:36:25 +0200 Subject: [PATCH 1/2] BUG: unstack accessing wrong index level when midx has mixed names --- doc/source/whatsnew/v2.0.0.rst | 1 + pandas/core/reshape/reshape.py | 3 +-- pandas/tests/frame/test_stack_unstack.py | 13 +++++++++++++ pandas/tests/series/methods/test_unstack.py | 14 ++++++++++++++ 4 files changed, 29 insertions(+), 2 deletions(-) diff --git a/doc/source/whatsnew/v2.0.0.rst b/doc/source/whatsnew/v2.0.0.rst index 508d5d8bc4cc1..77c115ed50786 100644 --- a/doc/source/whatsnew/v2.0.0.rst +++ b/doc/source/whatsnew/v2.0.0.rst @@ -263,6 +263,7 @@ Groupby/resample/rolling Reshaping ^^^^^^^^^ - Bug in :meth:`DataFrame.pivot_table` raising ``TypeError`` for nullable dtype and ``margins=True`` (:issue:`48681`) +- Bug in :meth:`DataFrame.unstack` and :meth:`Series.unstack` unstacking wrong level of :class:`MultiIndex` when :class:`MultiIndex` has mixed names (:issue:`48763`) - Bug in :meth:`DataFrame.pivot` not respecting ``None`` as column name (:issue:`48293`) - Bug in :func:`join` when ``left_on`` or ``right_on`` is or includes a :class:`CategoricalIndex` incorrectly raising ``AttributeError`` (:issue:`48464`) - diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a79332ab3ee55..a437eb39e90a2 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -470,9 +470,8 @@ def unstack(obj: Series | DataFrame, level, fill_value=None): else: level = level[0] - # Prioritize integer interpretation (GH #21677): if not is_integer(level) and not level == "__placeholder__": - level = obj.index._get_level_number(level) + obj.index._get_level_number(level) if isinstance(obj, DataFrame): if isinstance(obj.index, MultiIndex): diff --git a/pandas/tests/frame/test_stack_unstack.py b/pandas/tests/frame/test_stack_unstack.py index 69e5d5e3d5447..22075a30bdb65 100644 --- a/pandas/tests/frame/test_stack_unstack.py +++ b/pandas/tests/frame/test_stack_unstack.py @@ -2183,3 +2183,16 @@ def test_stack_nullable_dtype(self): # be an EA expected = df.astype(object).stack("station") tm.assert_frame_equal(result, expected) + + def test_unstack_mixed_level_names(self): + # GH#48763 + arrays = [["a", "a"], [1, 2], ["red", "blue"]] + idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y")) + df = DataFrame({"m": [1, 2]}, index=idx) + result = df.unstack("x") + expected = DataFrame( + [[1], [2]], + columns=MultiIndex.from_tuples([("m", "a")], names=[None, "x"]), + index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]), + ) + tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/methods/test_unstack.py b/pandas/tests/series/methods/test_unstack.py index 23b068214dd91..980fcbc141822 100644 --- a/pandas/tests/series/methods/test_unstack.py +++ b/pandas/tests/series/methods/test_unstack.py @@ -147,3 +147,17 @@ def test_unstack_multi_index_categorical_values(): index=dti.rename("major"), ) tm.assert_frame_equal(result, expected) + + +def test_unstack_mixed_level_names(): + # GH#48763 + arrays = [["a", "a"], [1, 2], ["red", "blue"]] + idx = MultiIndex.from_arrays(arrays, names=("x", 0, "y")) + ser = Series([1, 2], index=idx) + result = ser.unstack("x") + expected = DataFrame( + [[1], [2]], + columns=pd.Index(["a"], name="x"), + index=MultiIndex.from_tuples([(1, "red"), (2, "blue")], names=[0, "y"]), + ) + tm.assert_frame_equal(result, expected) From 9e4f1ff2a9b9b64e82094d6c7914b63c25290ff8 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Fri, 14 Oct 2022 19:29:13 +0200 Subject: [PATCH 2/2] Add comment --- pandas/core/reshape/reshape.py | 1 + 1 file changed, 1 insertion(+) diff --git a/pandas/core/reshape/reshape.py b/pandas/core/reshape/reshape.py index a437eb39e90a2..856bef3e42ee0 100644 --- a/pandas/core/reshape/reshape.py +++ b/pandas/core/reshape/reshape.py @@ -471,6 +471,7 @@ def unstack(obj: Series | DataFrame, level, fill_value=None): level = level[0] if not is_integer(level) and not level == "__placeholder__": + # check if level is valid in case of regular index obj.index._get_level_number(level) if isinstance(obj, DataFrame):