From 0149a1c45041d31ccbee5afe7c117273e6881673 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 28 Aug 2022 13:44:16 +0200 Subject: [PATCH 1/5] BUG: MultiIndex.append not checking names for equality --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/core/indexes/multi.py | 12 +++++------- pandas/tests/indexes/multi/test_reshape.py | 9 +++++++++ 3 files changed, 15 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index eac5e5d3a0f52..b0d3e64e608f9 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -161,7 +161,7 @@ Missing MultiIndex ^^^^^^^^^^ -- +- Bug in :meth:´MultiIndex.append` not checking names for equality (:issue:`40000`) - I/O diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 63c78b7002786..5615de71e6706 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2203,21 +2203,19 @@ def append(self, other): if all( (isinstance(o, MultiIndex) and o.nlevels >= self.nlevels) for o in other ): - arrays = [] + arrays, names = [], [] for i in range(self.nlevels): label = self._get_level_values(i) appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) - return MultiIndex.from_arrays(arrays, names=self.names) + level_names = {label.name}.union({x.name for x in appended}) + names.append(None if len(level_names) > 1 else label.name) + return MultiIndex.from_arrays(arrays, names=names) to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) - # if all(isinstance(x, MultiIndex) for x in other): - try: - return MultiIndex.from_tuples(new_tuples, names=self.names) - except (TypeError, IndexError): - return Index._with_infer(new_tuples) + return Index._with_infer(new_tuples) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: return self._values.argsort(*args, **kwargs) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index b1deec12b1adb..484d82b6c64f5 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -150,6 +150,15 @@ def test_append_index(): tm.assert_index_equal(result, expected) +@pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)]) +def test_append_names_match(name, exp): + midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name]) + result = midx.append(midx2) + expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=["a", exp]) + tm.assert_index_equal(result, expected) + + def test_repeat(): reps = 2 numbers = [1, 2, 3] From af109932bfec54be35e66af1a0971ba7d2af5100 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 28 Aug 2022 13:45:06 +0200 Subject: [PATCH 2/5] BUG: MultiIndex.append not checking names for equality --- doc/source/whatsnew/v1.6.0.rst | 2 +- pandas/tests/indexes/multi/test_reshape.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index b0d3e64e608f9..7771ddd19fc1a 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -161,7 +161,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :meth:´MultiIndex.append` not checking names for equality (:issue:`40000`) +- Bug in :meth:´MultiIndex.append` not checking names for equality (:issue:`48288`) - I/O diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index 484d82b6c64f5..e11d3cf9cccb8 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -152,6 +152,7 @@ def test_append_index(): @pytest.mark.parametrize("name, exp", [("b", "b"), ("c", None)]) def test_append_names_match(name, exp): + # GH#48288 midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) midx2 = MultiIndex.from_arrays([[3], [5]], names=["a", name]) result = midx.append(midx2) From aac88b59461ff0342445614f6ba25c5a4e0f04cf Mon Sep 17 00:00:00 2001 From: Patrick Hoefler <61934744+phofl@users.noreply.github.com> Date: Mon, 29 Aug 2022 20:35:28 +0200 Subject: [PATCH 3/5] Update doc/source/whatsnew/v1.6.0.rst Co-authored-by: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> --- doc/source/whatsnew/v1.6.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst index 7771ddd19fc1a..83dfacb46784b 100644 --- a/doc/source/whatsnew/v1.6.0.rst +++ b/doc/source/whatsnew/v1.6.0.rst @@ -161,7 +161,7 @@ Missing MultiIndex ^^^^^^^^^^ -- Bug in :meth:´MultiIndex.append` not checking names for equality (:issue:`48288`) +- Bug in :meth:`MultiIndex.append` not checking names for equality (:issue:`48288`) - I/O From 16d2332269d1e2f891af011e0e718ead2cbb8b6c Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 29 Aug 2022 21:53:20 +0200 Subject: [PATCH 4/5] Add test and fix bug --- pandas/core/indexes/multi.py | 8 +++++++- pandas/core/reshape/pivot.py | 8 +++++++- pandas/tests/indexes/multi/test_reshape.py | 9 +++++++++ 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 5615de71e6706..a11320f2ad6d2 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2215,7 +2215,13 @@ def append(self, other): to_concat = (self._values,) + tuple(k._values for k in other) new_tuples = np.concatenate(to_concat) - return Index._with_infer(new_tuples) + # if all(isinstance(x, MultiIndex) for x in other): + try: + # We only get here if other contains at least one index with tuples, + # setting names to None automatically + return MultiIndex.from_tuples(new_tuples) + except (TypeError, IndexError): + return Index._with_infer(new_tuples) def argsort(self, *args, **kwargs) -> npt.NDArray[np.intp]: return self._values.argsort(*args, **kwargs) diff --git a/pandas/core/reshape/pivot.py b/pandas/core/reshape/pivot.py index 867835ef7f0a3..4f3abb922c6df 100644 --- a/pandas/core/reshape/pivot.py +++ b/pandas/core/reshape/pivot.py @@ -391,7 +391,13 @@ def _all_key(key): # GH31016 this is to calculate margin for each group, and assign # corresponded key as index transformed_piece = DataFrame(piece.apply(aggfunc)).T - transformed_piece.index = Index([all_key], name=piece.index.name) + if isinstance(piece.index, MultiIndex): + # We are adding an empty level + transformed_piece.index = MultiIndex.from_tuples( + [all_key], names=piece.index.names + [None] + ) + else: + transformed_piece.index = Index([all_key], name=piece.index.name) # append piece for margin into table_piece table_pieces.append(transformed_piece) diff --git a/pandas/tests/indexes/multi/test_reshape.py b/pandas/tests/indexes/multi/test_reshape.py index e11d3cf9cccb8..f4b845be2709c 100644 --- a/pandas/tests/indexes/multi/test_reshape.py +++ b/pandas/tests/indexes/multi/test_reshape.py @@ -160,6 +160,15 @@ def test_append_names_match(name, exp): tm.assert_index_equal(result, expected) +def test_append_names_dont_match(): + # GH#48288 + midx = MultiIndex.from_arrays([[1, 2], [3, 4]], names=["a", "b"]) + midx2 = MultiIndex.from_arrays([[3], [5]], names=["x", "y"]) + result = midx.append(midx2) + expected = MultiIndex.from_arrays([[1, 2, 3], [3, 4, 5]], names=None) + tm.assert_index_equal(result, expected) + + def test_repeat(): reps = 2 numbers = [1, 2, 3] From 94522fcc91b978fe7782c87800e5992d22241c34 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Mon, 29 Aug 2022 23:40:44 +0200 Subject: [PATCH 5/5] Improve name checking --- pandas/core/indexes/multi.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index a11320f2ad6d2..57b15ca4001d0 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -2208,8 +2208,8 @@ def append(self, other): label = self._get_level_values(i) appended = [o._get_level_values(i) for o in other] arrays.append(label.append(appended)) - level_names = {label.name}.union({x.name for x in appended}) - names.append(None if len(level_names) > 1 else label.name) + single_label_name = all(label.name == x.name for x in appended) + names.append(label.name if single_label_name else None) return MultiIndex.from_arrays(arrays, names=names) to_concat = (self._values,) + tuple(k._values for k in other)