From efff6689c13139da74210d73a4fe263f9cf9adb6 Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Fri, 21 Jun 2024 20:37:58 -0700 Subject: [PATCH 1/6] Fix insertion of None value in MultiIndex --- pandas/core/indexes/multi.py | 7 +++++-- pandas/tests/test_multilevel.py | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9d7c7f3e4a5c9..9205b11fd7847 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3906,8 +3906,11 @@ def insert(self, loc: int, item) -> MultiIndex: # have to insert into level # must insert at end otherwise you have to recompute all the # other codes - lev_loc = len(level) - level = level.insert(lev_loc, k) + if k is None: # GH 59003 + lev_loc = -1 + else: + lev_loc = len(level) + level = level.insert(lev_loc, k) else: lev_loc = level.get_loc(k) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 8f661edf0f241..a075b6f2a5f66 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -6,6 +6,7 @@ import pandas as pd from pandas import ( DataFrame, + Index, MultiIndex, Series, ) @@ -288,6 +289,14 @@ def test_multiindex_with_na(self): tm.assert_frame_equal(df, expected) + def test_multiindex_insert_level_with_na(self): + # GH 59003 + indices = [["one"], ["a"], ["yes"]] + df = DataFrame([0], index=indices).T + df["one", None, "yes"] = 1 + expected = DataFrame([1], index=Index(["yes"])).T + tm.assert_frame_equal(df["one"][None], expected) + class TestSorted: """everything you wanted to test about sorting""" From fc8e0533a25254b6a01f26bd5011f867b4ddc272 Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Sun, 23 Jun 2024 16:54:56 -0700 Subject: [PATCH 2/6] add whatnew entry --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 3d869bf31f372..ed0fa17fcf9e6 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -543,6 +543,7 @@ MultiIndex ^^^^^^^^^^ - :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) - :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) +- :meth:`MultiIndex.insert` would not insert None value correctly at unified location of index -1 (:issue:`59003`) - I/O From 385cb65800f339ebcd919746d5c9539b6868f2ce Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Sun, 23 Jun 2024 23:30:21 -0700 Subject: [PATCH 3/6] check for NA values using missing::isna --- pandas/core/indexes/multi.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 9205b11fd7847..8e3ebc7816fed 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -3906,7 +3906,7 @@ def insert(self, loc: int, item) -> MultiIndex: # have to insert into level # must insert at end otherwise you have to recompute all the # other codes - if k is None: # GH 59003 + if isna(k): # GH 59003 lev_loc = -1 else: lev_loc = len(level) From bea513e31f4c1f7fdd3603105cced26a5f69719a Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Sun, 23 Jun 2024 23:30:51 -0700 Subject: [PATCH 4/6] adding test insertion of np.nan --- pandas/tests/test_multilevel.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index a075b6f2a5f66..61c44175d36dd 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -296,6 +296,9 @@ def test_multiindex_insert_level_with_na(self): df["one", None, "yes"] = 1 expected = DataFrame([1], index=Index(["yes"])).T tm.assert_frame_equal(df["one"][None], expected) + df["one", np.nan, "yes"] = 1 + expected = DataFrame([1], index=Index(["yes"])).T + tm.assert_frame_equal(df["one"][np.nan], expected) class TestSorted: From 62a038c602dd919e9ccc33ca5299bed2b78932ca Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Sun, 23 Jun 2024 23:31:06 -0700 Subject: [PATCH 5/6] update whatsnew entry --- doc/source/whatsnew/v3.0.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index ed0fa17fcf9e6..2fd81d38f3d6a 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -543,7 +543,7 @@ MultiIndex ^^^^^^^^^^ - :func:`DataFrame.loc` with ``axis=0`` and :class:`MultiIndex` when setting a value adds extra columns (:issue:`58116`) - :meth:`DataFrame.melt` would not accept multiple names in ``var_name`` when the columns were a :class:`MultiIndex` (:issue:`58033`) -- :meth:`MultiIndex.insert` would not insert None value correctly at unified location of index -1 (:issue:`59003`) +- :meth:`MultiIndex.insert` would not insert NA value correctly at unified location of index -1 (:issue:`59003`) - I/O From 84859a2f6df2c85ea640d9d73515ffb0c31a0e2c Mon Sep 17 00:00:00 2001 From: Chaoyi Hu Date: Mon, 24 Jun 2024 13:14:13 -0700 Subject: [PATCH 6/6] test case revision --- pandas/tests/test_multilevel.py | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index 61c44175d36dd..e87498742061b 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -6,7 +6,6 @@ import pandas as pd from pandas import ( DataFrame, - Index, MultiIndex, Series, ) @@ -289,16 +288,12 @@ def test_multiindex_with_na(self): tm.assert_frame_equal(df, expected) - def test_multiindex_insert_level_with_na(self): + @pytest.mark.parametrize("na", [None, np.nan]) + def test_multiindex_insert_level_with_na(self, na): # GH 59003 - indices = [["one"], ["a"], ["yes"]] - df = DataFrame([0], index=indices).T - df["one", None, "yes"] = 1 - expected = DataFrame([1], index=Index(["yes"])).T - tm.assert_frame_equal(df["one"][None], expected) - df["one", np.nan, "yes"] = 1 - expected = DataFrame([1], index=Index(["yes"])).T - tm.assert_frame_equal(df["one"][np.nan], expected) + df = DataFrame([0], columns=[["A"], ["B"]]) + df[na, "B"] = 1 + tm.assert_frame_equal(df[na], DataFrame([1], columns=["B"])) class TestSorted: