From 723a555167cd0f1a6767bc0e2eb9f2855ed08cd7 Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Sun, 1 Dec 2024 13:22:49 -0800 Subject: [PATCH 1/3] BUG: Fix keyerror bug when indexing multiindex columns with NaT values --- pandas/core/indexes/multi.py | 7 +++---- .../tests/frame/methods/test_reset_index.py | 21 +++++++++++++++++++ 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py index 36e68465a99d9..dc48cd1ed958e 100644 --- a/pandas/core/indexes/multi.py +++ b/pandas/core/indexes/multi.py @@ -4084,11 +4084,10 @@ def insert(self, loc: int, item) -> MultiIndex: # have to insert into level # must insert at end otherwise you have to recompute all the # other codes - if isna(k): # GH 59003 + lev_loc = len(level) + level = level.insert(lev_loc, k) + if isna(level[lev_loc]): # GH 59003, 60388 lev_loc = -1 - else: - lev_loc = len(level) - level = level.insert(lev_loc, k) else: lev_loc = level.get_loc(k) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 88e43b678a7e4..1489d4d7458ff 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -544,6 +544,27 @@ def test_reset_index_datetime3(self, tz_naive_fixture): result = df.reset_index() tm.assert_frame_equal(result, expected) + def test_reset_index_multiindex_dt_with_nan(self): + # GH#60388 + df = DataFrame( + { + "Date": [ + datetime(2024, 11, 1), + datetime(2024, 11, 1), + datetime(2024, 11, 2), + datetime(2024, 11, 2), + ], + "sub": ["a", "b", "c", "d"], + "value1": [1, 2, 3, 4], + "value2": [5, 6, 7, 8], + } + ) + df = df.pivot(index="sub", columns="Date", values=["value1", "value2"]) + df = df.reset_index() + result = df[df.columns[0]] + expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) + tm.assert_series_equal(result, expected) + def test_reset_index_period(self): # GH#7746 idx = MultiIndex.from_product( From 19bbdb96ebe62b936932407d58f30f3b4c8645ff Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 2 Dec 2024 11:09:43 -0800 Subject: [PATCH 2/3] BUG: Update whatsnew/v3.0.0.rst --- doc/source/whatsnew/v3.0.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index e74bd2f745b94..e73ee0dfbe67e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -667,6 +667,7 @@ Indexing ^^^^^^^^ - Bug in :meth:`DataFrame.__getitem__` returning modified columns when called with ``slice`` in Python 3.12 (:issue:`57500`) - Bug in :meth:`DataFrame.from_records` throwing a ``ValueError`` when passed an empty list in ``index`` (:issue:`58594`) +- Bug in :meth:`MultiIndex.insert` when a new value inserted to a datetime-like level gets cast to ``NaT`` and fails indexing (:issue:`60388`) - Bug in printing :attr:`Index.names` and :attr:`MultiIndex.levels` would not escape single quotes (:issue:`60190`) Missing From ee877896594a3baf259c64aeb77af3f2f8f9f056 Mon Sep 17 00:00:00 2001 From: Nitish Satyavolu Date: Mon, 2 Dec 2024 11:17:58 -0800 Subject: [PATCH 3/3] BUG: Move new test to test_multilevel.py --- .../tests/frame/methods/test_reset_index.py | 21 ----------------- pandas/tests/test_multilevel.py | 23 +++++++++++++++++++ 2 files changed, 23 insertions(+), 21 deletions(-) diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 1489d4d7458ff..88e43b678a7e4 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -544,27 +544,6 @@ def test_reset_index_datetime3(self, tz_naive_fixture): result = df.reset_index() tm.assert_frame_equal(result, expected) - def test_reset_index_multiindex_dt_with_nan(self): - # GH#60388 - df = DataFrame( - { - "Date": [ - datetime(2024, 11, 1), - datetime(2024, 11, 1), - datetime(2024, 11, 2), - datetime(2024, 11, 2), - ], - "sub": ["a", "b", "c", "d"], - "value1": [1, 2, 3, 4], - "value2": [5, 6, 7, 8], - } - ) - df = df.pivot(index="sub", columns="Date", values=["value1", "value2"]) - df = df.reset_index() - result = df[df.columns[0]] - expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) - tm.assert_series_equal(result, expected) - def test_reset_index_period(self): # GH#7746 idx = MultiIndex.from_product( diff --git a/pandas/tests/test_multilevel.py b/pandas/tests/test_multilevel.py index e87498742061b..a23e6d9b3973a 100644 --- a/pandas/tests/test_multilevel.py +++ b/pandas/tests/test_multilevel.py @@ -295,6 +295,29 @@ def test_multiindex_insert_level_with_na(self, na): df[na, "B"] = 1 tm.assert_frame_equal(df[na], DataFrame([1], columns=["B"])) + def test_multiindex_dt_with_nan(self): + # GH#60388 + df = DataFrame( + [ + [1, np.nan, 5, np.nan], + [2, np.nan, 6, np.nan], + [np.nan, 3, np.nan, 7], + [np.nan, 4, np.nan, 8], + ], + index=Series(["a", "b", "c", "d"], dtype=object, name="sub"), + columns=MultiIndex.from_product( + [ + ["value1", "value2"], + [datetime.datetime(2024, 11, 1), datetime.datetime(2024, 11, 2)], + ], + names=[None, "Date"], + ), + ) + df = df.reset_index() + result = df[df.columns[0]] + expected = Series(["a", "b", "c", "d"], name=("sub", np.nan)) + tm.assert_series_equal(result, expected) + class TestSorted: """everything you wanted to test about sorting"""