From 8bb215cfd63a1a12d0ef4088f5d5665c2413f4e1 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 20:49:35 +0000 Subject: [PATCH 1/9] handle column index construction for MultiIndex --- pandas/core/groupby/generic.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 9c78166ce0480..e9830c16c9419 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1645,8 +1645,12 @@ def _wrap_aggregated_output( DataFrame """ indexed_output = {key.position: val for key, val in output.items()} - name = self._obj_with_exclusions._get_axis(1 - self.axis).name - columns = Index([key.label for key in output], name=name) + + agg_axis = self._obj_with_exclusions._get_axis(1 - self.axis) + if isinstance(agg_axis, MultiIndex): + columns = Index([key.label for key in output], names=agg_axis.names) + else: + columns = Index([key.label for key in output], name=agg_axis.name) result = self.obj._constructor(indexed_output) result.columns = columns From 47b223f52a63b2c963c78aa7e27695db1b150183 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Fri, 25 Sep 2020 21:40:59 +0000 Subject: [PATCH 2/9] add test --- pandas/tests/reshape/test_pivot_multilevel.py | 34 ++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/pandas/tests/reshape/test_pivot_multilevel.py b/pandas/tests/reshape/test_pivot_multilevel.py index 8374e829e6a28..f59a469c05d15 100644 --- a/pandas/tests/reshape/test_pivot_multilevel.py +++ b/pandas/tests/reshape/test_pivot_multilevel.py @@ -2,7 +2,7 @@ import pytest import pandas as pd -from pandas import Index, MultiIndex +from pandas import Index, Int64Index, MultiIndex import pandas._testing as tm @@ -190,3 +190,35 @@ def test_pivot_list_like_columns( expected_values, columns=expected_columns, index=expected_index ) tm.assert_frame_equal(result, expected) + + +def test_pivot_multiindexed_rows_and_cols(): + # GH 36360 + + df = pd.DataFrame( + data=np.arange(12).reshape(4, 3), + columns=MultiIndex.from_tuples( + [(0, 0), (0, 1), (0, 2)], names=["col_L0", "col_L1"] + ), + index=MultiIndex.from_tuples( + [(0, 0, 0), (0, 0, 1), (1, 1, 1), (1, 0, 0)], + names=["idx_L0", "idx_L1", "idx_L2"], + ), + ) + + res = df.pivot_table( + index=["idx_L0"], + columns=["idx_L1"], + values=[(0, 1)], + aggfunc=lambda col: col.values.sum(), + ) + + expected = pd.DataFrame( + data=[[5.0, np.nan], [10.0, 7.0]], + columns=MultiIndex.from_tuples( + [(0, 1, 0), (0, 1, 1)], names=["col_L0", "col_L1", "idx_L1"] + ), + index=Int64Index([0, 1], dtype="int64", name="idx_L0"), + ) + + tm.assert_frame_equal(res, expected) From 433318ae8e4d59bdee36ea56cf711aa8574b40ae Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sat, 26 Sep 2020 03:27:53 +0000 Subject: [PATCH 3/9] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 8aad4c6985f28..9ac56ecb0fa6f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -474,6 +474,7 @@ Reshaping - Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`) - Bug in :meth:`DataFrame.agg` with ``func={'name':}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) +- Bug in :meth:`DataFrame.pivot` did not preserve :class:`MultiIndex` level names for columns when rows and columns both multiindexed (:issue:`36360`) - Sparse From d02eace5cc75fe85b9043df9fb659f03817565d8 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Wed, 7 Oct 2020 21:40:30 +0000 Subject: [PATCH 4/9] remove special casing in _wrap_aggregated_output --- pandas/core/groupby/generic.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index e9830c16c9419..0ac05598118c4 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1647,10 +1647,7 @@ def _wrap_aggregated_output( indexed_output = {key.position: val for key, val in output.items()} agg_axis = self._obj_with_exclusions._get_axis(1 - self.axis) - if isinstance(agg_axis, MultiIndex): - columns = Index([key.label for key in output], names=agg_axis.names) - else: - columns = Index([key.label for key in output], name=agg_axis.name) + columns = Index([key.label for key in output], names=agg_axis.names) result = self.obj._constructor(indexed_output) result.columns = columns From 10dff12d35605dd37d3d89a4fe46f31bad9fe0ad Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 8 Oct 2020 07:02:06 +0000 Subject: [PATCH 5/9] drop the if-else clause --- pandas/core/groupby/generic.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 0ac05598118c4..581e476e4ee98 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1645,9 +1645,8 @@ def _wrap_aggregated_output( DataFrame """ indexed_output = {key.position: val for key, val in output.items()} - agg_axis = self._obj_with_exclusions._get_axis(1 - self.axis) - columns = Index([key.label for key in output], names=agg_axis.names) + columns = Index([key.label for key in output], name=agg_axis.names) result = self.obj._constructor(indexed_output) result.columns = columns From 25cfc8ea6b97b3ab826f5c1766ea17d05ecf075d Mon Sep 17 00:00:00 2001 From: arw2019 Date: Thu, 8 Oct 2020 17:07:37 +0000 Subject: [PATCH 6/9] handle name='[[None]' in Index constructor --- pandas/core/indexes/base.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 87dd15d5b142b..9736df3495adb 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -387,6 +387,11 @@ def __new__( ) # other iterable of some kind subarr = com.asarray_tuplesafe(data, dtype=object) + + if isinstance(name, list) and len(name) == 1: + # GH 36655: if name == [List[str]] we want List[str] + name = name[0] + return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ From 074f99e6ef1493bca6218f0b57491563041e0354 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 04:52:33 +0000 Subject: [PATCH 7/9] revert changes to Index constructor --- pandas/core/indexes/base.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9736df3495adb..87dd15d5b142b 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -387,11 +387,6 @@ def __new__( ) # other iterable of some kind subarr = com.asarray_tuplesafe(data, dtype=object) - - if isinstance(name, list) and len(name) == 1: - # GH 36655: if name == [List[str]] we want List[str] - name = name[0] - return Index(subarr, dtype=dtype, copy=copy, name=name, **kwargs) """ From c2d5f9e9f26326c960622e464b8c5d6c5fae68d3 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 04:53:10 +0000 Subject: [PATCH 8/9] use _get_names() in column Index constructor --- pandas/core/groupby/generic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 581e476e4ee98..96e183cb35ba3 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1646,7 +1646,7 @@ def _wrap_aggregated_output( """ indexed_output = {key.position: val for key, val in output.items()} agg_axis = self._obj_with_exclusions._get_axis(1 - self.axis) - columns = Index([key.label for key in output], name=agg_axis.names) + columns = Index([key.label for key in output], name=agg_axis._get_names()) result = self.obj._constructor(indexed_output) result.columns = columns From 14bfdc9c4b752343d74dece56b0fbf5ee944fde0 Mon Sep 17 00:00:00 2001 From: arw2019 Date: Sun, 11 Oct 2020 16:14:57 +0000 Subject: [PATCH 9/9] use _set_index in _wrap_aggregated_output --- pandas/core/groupby/generic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index 96e183cb35ba3..505435be9127c 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -1645,8 +1645,8 @@ def _wrap_aggregated_output( DataFrame """ indexed_output = {key.position: val for key, val in output.items()} - agg_axis = self._obj_with_exclusions._get_axis(1 - self.axis) - columns = Index([key.label for key in output], name=agg_axis._get_names()) + columns = Index([key.label for key in output]) + columns._set_names(self._obj_with_exclusions._get_axis(1 - self.axis).names) result = self.obj._constructor(indexed_output) result.columns = columns