From 549386ceb4bfd72648dc02021f315c4b0fd7cbb0 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 9 Oct 2021 22:35:27 +0200
Subject: [PATCH 01/43] fix problem without adding tests

---
 pandas/_libs/hashtable.pyx                  |  1 +
 pandas/_libs/index.pyx                      | 20 ++++++++++++++++++--
 pandas/core/generic.py                      |  3 +++
 pandas/core/indexes/base.py                 |  5 +++++
 pandas/core/indexes/multi.py                | 15 ++++++++++++++-
 pandas/core/indexing.py                     |  1 +
 pandas/tests/indexes/multi/test_indexing.py |  1 +
 7 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 6e97c13c644cf..d7a2861013a1a 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -121,6 +121,7 @@ cdef class ObjectFactorizer(Factorizer):
             uniques = ObjectVector()
             uniques.extend(self.uniques.to_array())
             self.uniques = uniques
+        print('WE ARE IN FACTORIZE')
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel, na_value)
         mask = (labels == na_sentinel)
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index ea0bebea8299b..0716e8792536a 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -129,6 +129,7 @@ cdef class IndexEngine:
         # We assume before we get here:
         #  - val is hashable
         self._ensure_mapping_populated()
+        #print('THIS IS THE MAPPING ' + str(self.mapping))
         return val in self.mapping
 
     cpdef get_loc(self, object val):
@@ -158,6 +159,8 @@ cdef class IndexEngine:
             return self._get_loc_duplicates(val)
 
         try:
+            #print('THIS IS val: ' + str(val))
+            #print('THIS IS MAPPING: ' + str(self.mapping))
             return self.mapping.get_item(val)
         except OverflowError as err:
             # GH#41775 OverflowError e.g. if we are uint64 and val is -1
@@ -594,13 +597,16 @@ cdef class BaseMultiIndexCodesEngine:
 
         # Transform labels in a single array, and add 1 so that we are working
         # with positive integers (-1 for NaN becomes 0):
+        #print('\nTHIS ARE LABELS IN INITIALIZATION OF BASEMULTIINDEXCODESENGINE: ' + str(labels) + " THIS IS THE TYPE OF LABELS: " + str(type(labels)))
         codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
                                                                copy=False)
+        #print('\nTHIS ARE THE CODES: ' + str(codes) + " THIS IS THE TYPE OF CODES: " + str(type(codes)) + '\n')
 
         # Map each codes combination in the index to an integer unambiguously
         # (no collisions possible), based on the "offsets", which describe the
         # number of bits to switch labels for each level:
         lab_ints = self._codes_to_ints(codes)
+        #print('\nTHIS ARE THE LAB INTS: ' + str(lab_ints) + " THIS IS THE TYPE OF LAB INTS: " + str(type(codes)) + '\n')
 
         # Initialize underlying index (e.g. libindex.UInt64Engine) with
         # integers representing labels: we will use its get_loc and get_indexer
@@ -731,13 +737,23 @@ cdef class BaseMultiIndexCodesEngine:
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):
+        #print('get_loc, we want to get key' + str(key))
         if is_definitely_invalid_key(key):
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
+        # print('LEVELS: ' + str(type(self.levels[0])) + 'KEYS: ' + str(key))
         try:
-            indices = [0 if checknull(v) else lev.get_loc(v) + 1
-                       for lev, v in zip(self.levels, key)]
+            # indices = [0 if checknull(v) else lev.get_loc(v) + 1
+            #            for lev, v in zip(self.levels, key)]
+            indices = []
+            for key, lev in zip(key, self.levels):
+                try:
+                    indices.append(lev.get_loc(key) + 1)
+                except KeyError:
+                    indices.append(0)
+            # indices = [lev.get_loc(v) + 1
+            #            for lev, v in zip(self.levels, key)]
         except KeyError:
             raise KeyError(key)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index b235f120d98c8..123b0747dce70 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1835,6 +1835,7 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
         axis = self._get_axis_number(axis)
         other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
 
+        # import pdb; pdb.set_trace()
         if self._is_label_reference(key, axis=axis):
             self._check_label_or_level_ambiguity(key, axis=axis)
             values = self.xs(key, axis=other_axes[0])._values
@@ -3846,6 +3847,8 @@ class   animal   locomotion
         self._consolidate_inplace()
 
         if isinstance(index, MultiIndex):
+            #import pdb; pdb.set_trace()
+            import traceback as tb;
             loc, new_index = index._get_loc_level(key, level=0)
             if not drop_level:
                 if lib.is_integer(loc):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 2ff9b3973a526..ee83709df4257 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3460,8 +3460,13 @@ def get_loc(self, key, method=None, tolerance=None):
                     "tolerance argument only valid if using pad, "
                     "backfill or nearest lookups"
                 )
+            # print(f'key before casting {key}')
             casted_key = self._maybe_cast_indexer(key)
             try:
+                #import pdb; pdb.set_trace()
+                # print(f'casted key {casted_key}')
+                # import pdb; pdb.set_trace()
+                # import traceback as tb; tb.print_stack()
                 return self._engine.get_loc(casted_key)
             except KeyError as err:
                 raise KeyError(key) from err
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index daca14692ed09..237e718a0f7bd 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -132,7 +132,15 @@ def _codes_to_ints(self, codes):
         """
         # Shift the representation of each level by the pre-calculated number
         # of bits:
+        #print(f'codes {codes}, self.offsets {self.offsets}')
+        #import pdb; pdb.set_trace()
         codes <<= self.offsets
+        #if len(codes.shape) == 1 and list(codes) == [4,0]:
+            # import traceback as tb; tb.print_stack()
+            #codes = np.array([4, 2])
+            #import pdb; pdb.set_trace()
+            #print('hi')
+        #print(f'codes after shifting {codes}')
 
         # Now sum and OR are in fact interchangeable. This is a simple
         # composition of the (disjunct) significant bits of each level (i.e.
@@ -3001,7 +3009,12 @@ def maybe_mi_droplevels(indexer, levels):
                 if len(key) == self.nlevels and self.is_unique:
                     # Complete key in unique index -> standard get_loc
                     try:
-                        return (self._engine.get_loc(key), None)
+                        engine = self._engine
+                        #import pdb; pdb.set_trace()
+                        # print(f'key before handing it to engine {engine} is key: {key}')
+                        # import pdb; pdb.set_trace()
+                        loc_res = engine.get_loc(key)
+                        return (loc_res, None)
                     except KeyError as err:
                         raise KeyError(key) from err
                     except TypeError:
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index bbb3cb3391dfa..24d543d2af139 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1110,6 +1110,7 @@ def _get_label(self, label, axis: int):
 
     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
         # we have an axis0 multi-index, handle or raise
+        #import pdb; pdb.set_trace()
         axis = self.axis or 0
         try:
             # fast path for series or for tup devoid of slices
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index 99322f474dd9e..e7b67fdc9f9c7 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -625,6 +625,7 @@ def test_get_loc_cast_bool(self):
         # GH 19086 : int is casted to bool, but not vice-versa
         levels = [[False, True], np.arange(2, dtype="int64")]
         idx = MultiIndex.from_product(levels)
+        import pdb; pdb.set_trace()
 
         assert idx.get_loc((0, 1)) == 1
         assert idx.get_loc((1, 0)) == 2

From 81970c7163f2685ee79765d8a360e729008d6fb8 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 9 Oct 2021 22:42:22 +0200
Subject: [PATCH 02/43] add test, still not all tests running

---
 pandas/tests/indexing/multiindex/test_getitem.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 3790a6e9a5319..441591047eb62 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -392,3 +392,17 @@ def test_loc_empty_multiindex():
     result = df
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
+
+def test_loc_nan_multiindex():
+    df = DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    result = agg_df.loc[agg_df.index[-1]]
+    expected = [2, 4]
+    assert result == expected

From 8443c62941431886d8594aaa172a9cab581cb03d Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 12:20:23 +0200
Subject: [PATCH 03/43] remove print and debug statements

---
 pandas/_libs/hashtable.pyx                  |  1 -
 pandas/_libs/index.pyx                      | 28 ++++++---------------
 pandas/core/generic.py                      |  3 ---
 pandas/core/indexes/base.py                 |  5 ----
 pandas/core/indexes/multi.py                | 15 +----------
 pandas/core/indexing.py                     |  1 -
 pandas/tests/indexes/multi/test_indexing.py |  1 -
 7 files changed, 9 insertions(+), 45 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index d7a2861013a1a..6e97c13c644cf 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -121,7 +121,6 @@ cdef class ObjectFactorizer(Factorizer):
             uniques = ObjectVector()
             uniques.extend(self.uniques.to_array())
             self.uniques = uniques
-        print('WE ARE IN FACTORIZE')
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel, na_value)
         mask = (labels == na_sentinel)
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 0716e8792536a..f1b174774d6fe 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -129,7 +129,6 @@ cdef class IndexEngine:
         # We assume before we get here:
         #  - val is hashable
         self._ensure_mapping_populated()
-        #print('THIS IS THE MAPPING ' + str(self.mapping))
         return val in self.mapping
 
     cpdef get_loc(self, object val):
@@ -159,8 +158,6 @@ cdef class IndexEngine:
             return self._get_loc_duplicates(val)
 
         try:
-            #print('THIS IS val: ' + str(val))
-            #print('THIS IS MAPPING: ' + str(self.mapping))
             return self.mapping.get_item(val)
         except OverflowError as err:
             # GH#41775 OverflowError e.g. if we are uint64 and val is -1
@@ -597,16 +594,13 @@ cdef class BaseMultiIndexCodesEngine:
 
         # Transform labels in a single array, and add 1 so that we are working
         # with positive integers (-1 for NaN becomes 0):
-        #print('\nTHIS ARE LABELS IN INITIALIZATION OF BASEMULTIINDEXCODESENGINE: ' + str(labels) + " THIS IS THE TYPE OF LABELS: " + str(type(labels)))
         codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
                                                                copy=False)
-        #print('\nTHIS ARE THE CODES: ' + str(codes) + " THIS IS THE TYPE OF CODES: " + str(type(codes)) + '\n')
 
         # Map each codes combination in the index to an integer unambiguously
         # (no collisions possible), based on the "offsets", which describe the
         # number of bits to switch labels for each level:
         lab_ints = self._codes_to_ints(codes)
-        #print('\nTHIS ARE THE LAB INTS: ' + str(lab_ints) + " THIS IS THE TYPE OF LAB INTS: " + str(type(codes)) + '\n')
 
         # Initialize underlying index (e.g. libindex.UInt64Engine) with
         # integers representing labels: we will use its get_loc and get_indexer
@@ -737,25 +731,19 @@ cdef class BaseMultiIndexCodesEngine:
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):
-        #print('get_loc, we want to get key' + str(key))
         if is_definitely_invalid_key(key):
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
-        # print('LEVELS: ' + str(type(self.levels[0])) + 'KEYS: ' + str(key))
-        try:
-            # indices = [0 if checknull(v) else lev.get_loc(v) + 1
-            #            for lev, v in zip(self.levels, key)]
-            indices = []
-            for key, lev in zip(key, self.levels):
-                try:
-                    indices.append(lev.get_loc(key) + 1)
-                except KeyError:
+        indices = []
+        for k, lev in zip(key, self.levels):
+            try:
+                indices.append(lev.get_loc(k) + 1)
+            except KeyError as err:
+                if checknull(k):
                     indices.append(0)
-            # indices = [lev.get_loc(v) + 1
-            #            for lev, v in zip(self.levels, key)]
-        except KeyError:
-            raise KeyError(key)
+                else:
+                    raise KeyError(key) from err
 
         # Transform indices into single integer:
         lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 123b0747dce70..b235f120d98c8 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1835,7 +1835,6 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
         axis = self._get_axis_number(axis)
         other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
 
-        # import pdb; pdb.set_trace()
         if self._is_label_reference(key, axis=axis):
             self._check_label_or_level_ambiguity(key, axis=axis)
             values = self.xs(key, axis=other_axes[0])._values
@@ -3847,8 +3846,6 @@ class   animal   locomotion
         self._consolidate_inplace()
 
         if isinstance(index, MultiIndex):
-            #import pdb; pdb.set_trace()
-            import traceback as tb;
             loc, new_index = index._get_loc_level(key, level=0)
             if not drop_level:
                 if lib.is_integer(loc):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index ee83709df4257..2ff9b3973a526 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3460,13 +3460,8 @@ def get_loc(self, key, method=None, tolerance=None):
                     "tolerance argument only valid if using pad, "
                     "backfill or nearest lookups"
                 )
-            # print(f'key before casting {key}')
             casted_key = self._maybe_cast_indexer(key)
             try:
-                #import pdb; pdb.set_trace()
-                # print(f'casted key {casted_key}')
-                # import pdb; pdb.set_trace()
-                # import traceback as tb; tb.print_stack()
                 return self._engine.get_loc(casted_key)
             except KeyError as err:
                 raise KeyError(key) from err
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 237e718a0f7bd..daca14692ed09 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -132,15 +132,7 @@ def _codes_to_ints(self, codes):
         """
         # Shift the representation of each level by the pre-calculated number
         # of bits:
-        #print(f'codes {codes}, self.offsets {self.offsets}')
-        #import pdb; pdb.set_trace()
         codes <<= self.offsets
-        #if len(codes.shape) == 1 and list(codes) == [4,0]:
-            # import traceback as tb; tb.print_stack()
-            #codes = np.array([4, 2])
-            #import pdb; pdb.set_trace()
-            #print('hi')
-        #print(f'codes after shifting {codes}')
 
         # Now sum and OR are in fact interchangeable. This is a simple
         # composition of the (disjunct) significant bits of each level (i.e.
@@ -3009,12 +3001,7 @@ def maybe_mi_droplevels(indexer, levels):
                 if len(key) == self.nlevels and self.is_unique:
                     # Complete key in unique index -> standard get_loc
                     try:
-                        engine = self._engine
-                        #import pdb; pdb.set_trace()
-                        # print(f'key before handing it to engine {engine} is key: {key}')
-                        # import pdb; pdb.set_trace()
-                        loc_res = engine.get_loc(key)
-                        return (loc_res, None)
+                        return (self._engine.get_loc(key), None)
                     except KeyError as err:
                         raise KeyError(key) from err
                     except TypeError:
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 24d543d2af139..bbb3cb3391dfa 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1110,7 +1110,6 @@ def _get_label(self, label, axis: int):
 
     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
         # we have an axis0 multi-index, handle or raise
-        #import pdb; pdb.set_trace()
         axis = self.axis or 0
         try:
             # fast path for series or for tup devoid of slices
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index e7b67fdc9f9c7..99322f474dd9e 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -625,7 +625,6 @@ def test_get_loc_cast_bool(self):
         # GH 19086 : int is casted to bool, but not vice-versa
         levels = [[False, True], np.arange(2, dtype="int64")]
         idx = MultiIndex.from_product(levels)
-        import pdb; pdb.set_trace()
 
         assert idx.get_loc((0, 1)) == 1
         assert idx.get_loc((1, 0)) == 2

From 9eae7739a329d3678654405c23aa4d6be4b47afd Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 10:23:21 +0000
Subject: [PATCH 04/43] Fixes from pre-commit [automated commit]

---
 pandas/tests/indexing/multiindex/test_getitem.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 441591047eb62..f635e30570709 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -393,6 +393,7 @@ def test_loc_empty_multiindex():
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
 
+
 def test_loc_nan_multiindex():
     df = DataFrame(
         {
@@ -402,7 +403,7 @@ def test_loc_nan_multiindex():
         }
     )
 
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
     result = agg_df.loc[agg_df.index[-1]]
     expected = [2, 4]
     assert result == expected

From c497e34d906231b177c46c3d93a493c768b4ae55 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 12:53:58 +0200
Subject: [PATCH 05/43] add new test

---
 .../tests/indexing/multiindex/test_getitem.py | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 441591047eb62..c1c77d6a146ae 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -406,3 +406,28 @@ def test_loc_nan_multiindex():
     result = agg_df.loc[agg_df.index[-1]]
     expected = [2, 4]
     assert result == expected
+
+def test_loc_nan_multiindex2():
+    arrays = [[1, 1, 1], [np.nan, 100, np.nan]]
+    names = ("idx1", "idx2")
+    index = MultiIndex.from_arrays(arrays, names=names)
+    df = DataFrame([1, 2, 3], index=index, columns=['col1'])
+    result = df.loc[(1, np.nan)]
+
+    expected = DataFrame([1 , 3], index=MultiIndex.from_arrays([[1,1], [np.nan, np.nan]], names=names), columns=['col1'])
+    tm.assert_frame_equal(result, expected, check_index_type=False)
+
+
+def test_loc_nan_multiindex():
+    df = DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    result = agg_df.loc[agg_df.index[-1]]
+    expected = [2, 4]
+    assert result == expected

From fde027ba89bc73308381fa552231cf4a40607e38 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:21:29 +0200
Subject: [PATCH 06/43] update test as desired in PR comments

---
 .../tests/indexing/multiindex/test_getitem.py | 26 +++++++------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index c1c77d6a146ae..42495ba304cd8 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -393,7 +393,9 @@ def test_loc_empty_multiindex():
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
 
-def test_loc_nan_multiindex():
+@pytest.mark.parametrize("dropna", [True, False])
+def test_loc_nan_multiindex(dropna):
+    # GH 43943
     df = DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],
@@ -402,9 +404,14 @@ def test_loc_nan_multiindex():
         }
     )
 
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=dropna)["x"].agg(list)
     result = agg_df.loc[agg_df.index[-1]]
-    expected = [2, 4]
+    if dropna:
+        # nans are dropped, therefore only [1, 3] is left
+        expected = [1, 3]
+    else:
+        # last index is (0, np.nan) which corresponds to the list [2, 4]
+        expected = [2, 4]
     assert result == expected
 
 def test_loc_nan_multiindex2():
@@ -418,16 +425,3 @@ def test_loc_nan_multiindex2():
     tm.assert_frame_equal(result, expected, check_index_type=False)
 
 
-def test_loc_nan_multiindex():
-    df = DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
-    result = agg_df.loc[agg_df.index[-1]]
-    expected = [2, 4]
-    assert result == expected

From 246a421fb44bbad6a88b4fdf128061dc220c4b98 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:29:23 +0200
Subject: [PATCH 07/43] add whatsnew entry

---
 doc/source/whatsnew/v1.3.4.rst                   | 1 +
 pandas/tests/indexing/multiindex/test_getitem.py | 1 +
 2 files changed, 2 insertions(+)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index 6f07dc3e1e2f9..060dbc97524df 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -26,6 +26,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 - Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`)
 - Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`)
+- Fixed regression in :meth:`DataFrame.loc` when `MultiIndex` contained `np.nan` (:issue`43814`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index c519588dcec48..baa98601f5a9b 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -393,6 +393,7 @@ def test_loc_empty_multiindex():
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
 
+
 @pytest.mark.parametrize("dropna", [True, False])
 def test_loc_nan_multiindex(dropna):
     # GH 43943

From bbeeb1b993ef6089e13ba39b37ce8907d6082247 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:30:52 +0200
Subject: [PATCH 08/43] fix wrong issue number

---
 pandas/tests/indexing/multiindex/test_getitem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index baa98601f5a9b..c9242789f1486 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -396,7 +396,7 @@ def test_loc_empty_multiindex():
 
 @pytest.mark.parametrize("dropna", [True, False])
 def test_loc_nan_multiindex(dropna):
-    # GH 43943
+    # GH 43814
     df = DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],

From 6b38ba4312ce7a302d650bf7cc8c5147fb0898ba Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Tue, 12 Oct 2021 19:45:14 +0200
Subject: [PATCH 09/43] save nan encoding in new state variable and replace it
 before returning the output

---
 pandas/_libs/index.pyx                        | 14 +++++-------
 pandas/core/groupby/grouper.py                |  9 ++++++++
 pandas/core/groupby/ops.py                    | 17 +++++++++++++-
 pandas/tests/groupby/test_groupby_dropna.py   | 18 +++++++++++++++
 .../tests/indexing/multiindex/test_getitem.py | 22 -------------------
 5 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index f1b174774d6fe..ea0bebea8299b 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -735,15 +735,11 @@ cdef class BaseMultiIndexCodesEngine:
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
-        indices = []
-        for k, lev in zip(key, self.levels):
-            try:
-                indices.append(lev.get_loc(k) + 1)
-            except KeyError as err:
-                if checknull(k):
-                    indices.append(0)
-                else:
-                    raise KeyError(key) from err
+        try:
+            indices = [0 if checknull(v) else lev.get_loc(v) + 1
+                       for lev, v in zip(self.levels, key)]
+        except KeyError:
+            raise KeyError(key)
 
         # Transform indices into single integer:
         lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 7577b1e671d60..f62964ff378c0 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -486,6 +486,7 @@ def __init__(
         self._observed = observed
         self.in_axis = in_axis
         self._dropna = dropna
+        self._na_placeholder = None
 
         self._passed_categorical = False
 
@@ -690,6 +691,14 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             codes, uniques = algorithms.factorize(
                 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
             )
+            # GH43943, store placeholder for np.nan, will later be replaced by -1 in
+            # pandas/core/groupby:reconstructed_codes
+            if not self._dropna:
+                if any(
+                    isinstance(v, float) and np.isnan(v) for v in self.grouping_vector
+                ):
+                    self._na_placeholder = max(codes)
+
         return codes, uniques
 
     @cache_readonly
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 46e4465667e7e..6fa9c6607cfb0 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -847,7 +847,22 @@ def ngroups(self) -> int:
     def reconstructed_codes(self) -> list[np.ndarray]:
         codes = self.codes
         ids, obs_ids, _ = self.group_info
-        return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)
+        reconstructed_codes = decons_obs_group_ids(
+            ids, obs_ids, self.shape, codes, xnull=True
+        )
+
+        def transform_codes(code_level, grouping):
+            if grouping._na_placeholder is not None:
+                return np.where(code_level == max(code_level), -1, code_level)
+            else:
+                return code_level
+
+        # import pdb; pdb.set_trace()
+        transformed_rec_codes = [
+            transform_codes(code_level, grouping)
+            for code_level, grouping in zip(reconstructed_codes, self._groupings)
+        ]
+        return transformed_rec_codes
 
     @final
     @cache_readonly
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ab568e24ff029..b8f3bb2c4c8a8 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -370,3 +370,21 @@ def test_groupby_nan_included():
         tm.assert_numpy_array_equal(result_values, expected_values)
     assert np.isnan(list(result.keys())[2])
     assert list(result.keys())[0:2] == ["g1", "g2"]
+
+
+def test_groupby_codes_with_nan_in_multiindex():
+    # GH 43814
+    df = pd.DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
+    expected = pd.MultiIndex.from_arrays(
+        [[0, 0], ["o1", np.nan]], names=["temp_playlist", "objId"]
+    )
+    result = grouped_df.index
+    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index c9242789f1486..3790a6e9a5319 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -392,25 +392,3 @@ def test_loc_empty_multiindex():
     result = df
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("dropna", [True, False])
-def test_loc_nan_multiindex(dropna):
-    # GH 43814
-    df = DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
-    result = agg_df.loc[agg_df.index[-1]]
-    if dropna:
-        # nans are dropped, therefore only [1, 3] is left
-        expected = [1, 3]
-    else:
-        # last index is (0, np.nan) which corresponds to the list [2, 4]
-        expected = [2, 4]
-    assert result == expected

From d04b7b8f66ade97ffeeba8aaac45500f93211bca Mon Sep 17 00:00:00 2001
From: CloseChoice <tobias.pitters@gmx.de>
Date: Tue, 12 Oct 2021 21:56:10 +0200
Subject: [PATCH 10/43] remove wrong whatsnew entry

---
 doc/source/whatsnew/v1.3.4.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index 060dbc97524df..6f07dc3e1e2f9 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -26,7 +26,6 @@ Fixed regressions
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 - Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`)
 - Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`)
-- Fixed regression in :meth:`DataFrame.loc` when `MultiIndex` contained `np.nan` (:issue`43814`)
 
 .. ---------------------------------------------------------------------------
 

From cc75b447e54b1e3b1df2ab26220d6e3ca16e91a6 Mon Sep 17 00:00:00 2001
From: CloseChoice <tobias.pitters@gmx.de>
Date: Tue, 12 Oct 2021 22:00:49 +0200
Subject: [PATCH 11/43] remove debug statement

---
 pandas/core/groupby/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 6fa9c6607cfb0..fdf4d18802a35 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -857,7 +857,6 @@ def transform_codes(code_level, grouping):
             else:
                 return code_level
 
-        # import pdb; pdb.set_trace()
         transformed_rec_codes = [
             transform_codes(code_level, grouping)
             for code_level, grouping in zip(reconstructed_codes, self._groupings)

From c88b2d8fb4985837f81a5d65b3d2d70e9d49752b Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 13 Oct 2021 13:58:39 +0200
Subject: [PATCH 12/43] add whatsnew entry

---
 doc/source/whatsnew/v1.4.0.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index e638a24f830ef..5053421866ef4 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -503,6 +503,7 @@ Groupby/resample/rolling
 - Bug in :meth:`GroupBy.apply` with time-based :class:`Grouper` objects incorrectly raising ``ValueError`` in corner cases where the grouping vector contains a ``NaT`` (:issue:`43500`, :issue:`43515`)
 - Bug in :meth:`GroupBy.mean` failing with ``complex`` dtype (:issue:`43701`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and index is decreasing (:issue:`43927`)
+- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a `KeyError` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
 
 Reshaping
 ^^^^^^^^^

From 77e7283cacda1a93ca5af4262ab1417343793a4f Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 17 Oct 2021 20:39:15 +0200
Subject: [PATCH 13/43] fix wrong code quotes in whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index 1a687891b2d39..6d21d23c6d967 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -558,7 +558,7 @@ Groupby/resample/rolling
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`)
 - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`)
-- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a `KeyError` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
+- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a ``KeyError`` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
 
 Reshaping
 ^^^^^^^^^

From 72fb02684fabbd6a71f864b53e8f105c5b01b986 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Fri, 22 Oct 2021 06:44:53 +0200
Subject: [PATCH 14/43] WIP: add support for (pd.NA, pd.NaT,
 np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")

---
 pandas/core/groupby/grouper.py              |  8 +++---
 pandas/core/groupby/ops.py                  |  3 ++-
 pandas/tests/groupby/test_groupby_dropna.py | 28 ++++++++++++++++++---
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index f62964ff378c0..43023264c5faf 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -34,6 +34,7 @@
     Categorical,
     ExtensionArray,
 )
+from pandas.core.base import isna
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import ops
@@ -50,6 +51,8 @@
 
 from pandas.io.formats.printing import pprint_thing
 
+from pandas._libs.tslibs.nattype import NaTType
+
 if TYPE_CHECKING:
     from pandas.core.generic import NDFrame
 
@@ -693,10 +696,9 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             )
             # GH43943, store placeholder for np.nan, will later be replaced by -1 in
             # pandas/core/groupby:reconstructed_codes
+            import pdb; pdb.set_trace()
             if not self._dropna:
-                if any(
-                    isinstance(v, float) and np.isnan(v) for v in self.grouping_vector
-                ):
+                if isna(self.grouping_vector).any():
                     self._na_placeholder = max(codes)
 
         return codes, uniques
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index fdf4d18802a35..2f68260f01cfa 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -851,9 +851,10 @@ def reconstructed_codes(self) -> list[np.ndarray]:
             ids, obs_ids, self.shape, codes, xnull=True
         )
 
+        import pdb; pdb.set_trace()
         def transform_codes(code_level, grouping):
             if grouping._na_placeholder is not None:
-                return np.where(code_level == max(code_level), -1, code_level)
+                return np.where(code_level == grouping._na_placeholder, -1, code_level)
             else:
                 return code_level
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index b8f3bb2c4c8a8..ed468b8d67546 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -372,19 +372,41 @@ def test_groupby_nan_included():
     assert list(result.keys())[0:2] == ["g1", "g2"]
 
 
-def test_groupby_codes_with_nan_in_multiindex():
+@pytest.mark.parametrize(
+    "na", [np.nan, pd.NaT, np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")]
+)
+def test_groupby_codes_with_nan_in_multiindex(na):
+    # GH 43814
+    df = pd.DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", na, "o1", na],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
+    expected = pd.MultiIndex.from_arrays(
+        [[0, 0], ["o1", na]], names=["temp_playlist", "objId"]
+    )
+    result = grouped_df.index
+    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
+
+
+def test_groupby_codes_with_pd_nat_in_multiindex():
     # GH 43814
     df = pd.DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
+            "objId": ["o1", pd.NaT, "o1", pd.NaT],
             "x": [1, 2, 3, 4],
         }
     )
 
     grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
     expected = pd.MultiIndex.from_arrays(
-        [[0, 0], ["o1", np.nan]], names=["temp_playlist", "objId"]
+        [[0, 0], ["o1", pd.NaT]], names=["temp_playlist", "objId"]
     )
     result = grouped_df.index
+    # import pdb; pdb.set_trace()
     assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))

From cf0355d77ae157203ccb1efa75a3286ece3e3a83 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Fri, 22 Oct 2021 06:58:40 +0200
Subject: [PATCH 15/43] WIP: add support for (pd.NA, pd.NaT,
 np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")

---
 pandas/core/groupby/grouper.py              |  4 ++--
 pandas/core/groupby/ops.py                  |  1 -
 pandas/tests/groupby/test_groupby_dropna.py | 19 -------------------
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 43023264c5faf..ad9763a68bc66 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -694,9 +694,9 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             codes, uniques = algorithms.factorize(
                 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
             )
-            # GH43943, store placeholder for np.nan, will later be replaced by -1 in
+            # GH43943, store placeholder for (np.nan, pd.NaT, np.datetime64("NaT", "ns"),
+            # np.timedelta64("NaT", "ns")), will later be replaced by -1 in
             # pandas/core/groupby:reconstructed_codes
-            import pdb; pdb.set_trace()
             if not self._dropna:
                 if isna(self.grouping_vector).any():
                     self._na_placeholder = max(codes)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 2f68260f01cfa..e8c0bdc5be370 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -851,7 +851,6 @@ def reconstructed_codes(self) -> list[np.ndarray]:
             ids, obs_ids, self.shape, codes, xnull=True
         )
 
-        import pdb; pdb.set_trace()
         def transform_codes(code_level, grouping):
             if grouping._na_placeholder is not None:
                 return np.where(code_level == grouping._na_placeholder, -1, code_level)
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ed468b8d67546..2485276503ac9 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -391,22 +391,3 @@ def test_groupby_codes_with_nan_in_multiindex(na):
     )
     result = grouped_df.index
     assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
-
-
-def test_groupby_codes_with_pd_nat_in_multiindex():
-    # GH 43814
-    df = pd.DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", pd.NaT, "o1", pd.NaT],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
-    expected = pd.MultiIndex.from_arrays(
-        [[0, 0], ["o1", pd.NaT]], names=["temp_playlist", "objId"]
-    )
-    result = grouped_df.index
-    # import pdb; pdb.set_trace()
-    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))

From 427a5151e1ecf185491ab28e04de03d20c2bd70a Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Fri, 22 Oct 2021 05:11:39 +0000
Subject: [PATCH 16/43] Fixes from pre-commit [automated commit]

---
 pandas/core/groupby/grouper.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index ad9763a68bc66..1a044fd5ec1aa 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -14,6 +14,7 @@
 
 import numpy as np
 
+from pandas._libs.tslibs.nattype import NaTType
 from pandas._typing import (
     ArrayLike,
     NDFrameT,
@@ -51,8 +52,6 @@
 
 from pandas.io.formats.printing import pprint_thing
 
-from pandas._libs.tslibs.nattype import NaTType
-
 if TYPE_CHECKING:
     from pandas.core.generic import NDFrame
 

From 29c37b68f77ce9d14f74a65247ef79901f51a2bc Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 9 Oct 2021 22:35:27 +0200
Subject: [PATCH 17/43] fix problem without adding tests

---
 pandas/_libs/hashtable.pyx                  |  1 +
 pandas/_libs/index.pyx                      | 20 ++++++++++++++++++--
 pandas/core/generic.py                      |  3 +++
 pandas/core/indexes/base.py                 |  5 +++++
 pandas/core/indexes/multi.py                | 15 ++++++++++++++-
 pandas/core/indexing.py                     |  1 +
 pandas/tests/indexes/multi/test_indexing.py |  1 +
 7 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index 6e97c13c644cf..d7a2861013a1a 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -121,6 +121,7 @@ cdef class ObjectFactorizer(Factorizer):
             uniques = ObjectVector()
             uniques.extend(self.uniques.to_array())
             self.uniques = uniques
+        print('WE ARE IN FACTORIZE')
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel, na_value)
         mask = (labels == na_sentinel)
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index 92837a43e2b69..fdcc4e61f77c9 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -129,6 +129,7 @@ cdef class IndexEngine:
         # We assume before we get here:
         #  - val is hashable
         self._ensure_mapping_populated()
+        #print('THIS IS THE MAPPING ' + str(self.mapping))
         return val in self.mapping
 
     cpdef get_loc(self, object val):
@@ -158,6 +159,8 @@ cdef class IndexEngine:
             return self._get_loc_duplicates(val)
 
         try:
+            #print('THIS IS val: ' + str(val))
+            #print('THIS IS MAPPING: ' + str(self.mapping))
             return self.mapping.get_item(val)
         except OverflowError as err:
             # GH#41775 OverflowError e.g. if we are uint64 and val is -1
@@ -619,13 +622,16 @@ cdef class BaseMultiIndexCodesEngine:
 
         # Transform labels in a single array, and add 1 so that we are working
         # with positive integers (-1 for NaN becomes 0):
+        #print('\nTHIS ARE LABELS IN INITIALIZATION OF BASEMULTIINDEXCODESENGINE: ' + str(labels) + " THIS IS THE TYPE OF LABELS: " + str(type(labels)))
         codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
                                                                copy=False)
+        #print('\nTHIS ARE THE CODES: ' + str(codes) + " THIS IS THE TYPE OF CODES: " + str(type(codes)) + '\n')
 
         # Map each codes combination in the index to an integer unambiguously
         # (no collisions possible), based on the "offsets", which describe the
         # number of bits to switch labels for each level:
         lab_ints = self._codes_to_ints(codes)
+        #print('\nTHIS ARE THE LAB INTS: ' + str(lab_ints) + " THIS IS THE TYPE OF LAB INTS: " + str(type(codes)) + '\n')
 
         # Initialize underlying index (e.g. libindex.UInt64Engine) with
         # integers representing labels: we will use its get_loc and get_indexer
@@ -756,13 +762,23 @@ cdef class BaseMultiIndexCodesEngine:
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):
+        #print('get_loc, we want to get key' + str(key))
         if is_definitely_invalid_key(key):
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
+        # print('LEVELS: ' + str(type(self.levels[0])) + 'KEYS: ' + str(key))
         try:
-            indices = [0 if checknull(v) else lev.get_loc(v) + 1
-                       for lev, v in zip(self.levels, key)]
+            # indices = [0 if checknull(v) else lev.get_loc(v) + 1
+            #            for lev, v in zip(self.levels, key)]
+            indices = []
+            for key, lev in zip(key, self.levels):
+                try:
+                    indices.append(lev.get_loc(key) + 1)
+                except KeyError:
+                    indices.append(0)
+            # indices = [lev.get_loc(v) + 1
+            #            for lev, v in zip(self.levels, key)]
         except KeyError:
             raise KeyError(key)
 
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index c3ad87082c8ed..452b02dc0ad96 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1835,6 +1835,7 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
         axis = self._get_axis_number(axis)
         other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
 
+        # import pdb; pdb.set_trace()
         if self._is_label_reference(key, axis=axis):
             self._check_label_or_level_ambiguity(key, axis=axis)
             values = self.xs(key, axis=other_axes[0])._values
@@ -3804,6 +3805,8 @@ class   animal   locomotion
         self._consolidate_inplace()
 
         if isinstance(index, MultiIndex):
+            #import pdb; pdb.set_trace()
+            import traceback as tb;
             loc, new_index = index._get_loc_level(key, level=0)
             if not drop_level:
                 if lib.is_integer(loc):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 05047540c6ccd..274ffd33abe78 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3508,8 +3508,13 @@ def get_loc(self, key, method=None, tolerance=None):
                     "tolerance argument only valid if using pad, "
                     "backfill or nearest lookups"
                 )
+            # print(f'key before casting {key}')
             casted_key = self._maybe_cast_indexer(key)
             try:
+                #import pdb; pdb.set_trace()
+                # print(f'casted key {casted_key}')
+                # import pdb; pdb.set_trace()
+                # import traceback as tb; tb.print_stack()
                 return self._engine.get_loc(casted_key)
             except KeyError as err:
                 raise KeyError(key) from err
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index e2f1a2d6a1e23..8d72805e59d4d 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -132,7 +132,15 @@ def _codes_to_ints(self, codes):
         """
         # Shift the representation of each level by the pre-calculated number
         # of bits:
+        #print(f'codes {codes}, self.offsets {self.offsets}')
+        #import pdb; pdb.set_trace()
         codes <<= self.offsets
+        #if len(codes.shape) == 1 and list(codes) == [4,0]:
+            # import traceback as tb; tb.print_stack()
+            #codes = np.array([4, 2])
+            #import pdb; pdb.set_trace()
+            #print('hi')
+        #print(f'codes after shifting {codes}')
 
         # Now sum and OR are in fact interchangeable. This is a simple
         # composition of the (disjunct) significant bits of each level (i.e.
@@ -3001,7 +3009,12 @@ def maybe_mi_droplevels(indexer, levels):
                 if len(key) == self.nlevels and self.is_unique:
                     # Complete key in unique index -> standard get_loc
                     try:
-                        return (self._engine.get_loc(key), None)
+                        engine = self._engine
+                        #import pdb; pdb.set_trace()
+                        # print(f'key before handing it to engine {engine} is key: {key}')
+                        # import pdb; pdb.set_trace()
+                        loc_res = engine.get_loc(key)
+                        return (loc_res, None)
                     except KeyError as err:
                         raise KeyError(key) from err
                     except TypeError:
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index 918a9ea1b8030..a1c48a3213318 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1140,6 +1140,7 @@ def _get_label(self, label, axis: int):
 
     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
         # we have an axis0 multi-index, handle or raise
+        #import pdb; pdb.set_trace()
         axis = self.axis or 0
         try:
             # fast path for series or for tup devoid of slices
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index 99322f474dd9e..e7b67fdc9f9c7 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -625,6 +625,7 @@ def test_get_loc_cast_bool(self):
         # GH 19086 : int is casted to bool, but not vice-versa
         levels = [[False, True], np.arange(2, dtype="int64")]
         idx = MultiIndex.from_product(levels)
+        import pdb; pdb.set_trace()
 
         assert idx.get_loc((0, 1)) == 1
         assert idx.get_loc((1, 0)) == 2

From d274bca24676eab593ec2ef7a2d93bf59e45be63 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 9 Oct 2021 22:42:22 +0200
Subject: [PATCH 18/43] add test, still not all tests running

---
 pandas/tests/indexing/multiindex/test_getitem.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 3790a6e9a5319..441591047eb62 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -392,3 +392,17 @@ def test_loc_empty_multiindex():
     result = df
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
+
+def test_loc_nan_multiindex():
+    df = DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    result = agg_df.loc[agg_df.index[-1]]
+    expected = [2, 4]
+    assert result == expected

From f72cbf714a0cf5f41011a13899adf8924e99cb83 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 12:20:23 +0200
Subject: [PATCH 19/43] remove print and debug statements

---
 pandas/_libs/hashtable.pyx                  |  1 -
 pandas/_libs/index.pyx                      | 28 ++++++---------------
 pandas/core/generic.py                      |  3 ---
 pandas/core/indexes/base.py                 |  5 ----
 pandas/core/indexes/multi.py                | 15 +----------
 pandas/core/indexing.py                     |  1 -
 pandas/tests/indexes/multi/test_indexing.py |  1 -
 7 files changed, 9 insertions(+), 45 deletions(-)

diff --git a/pandas/_libs/hashtable.pyx b/pandas/_libs/hashtable.pyx
index d7a2861013a1a..6e97c13c644cf 100644
--- a/pandas/_libs/hashtable.pyx
+++ b/pandas/_libs/hashtable.pyx
@@ -121,7 +121,6 @@ cdef class ObjectFactorizer(Factorizer):
             uniques = ObjectVector()
             uniques.extend(self.uniques.to_array())
             self.uniques = uniques
-        print('WE ARE IN FACTORIZE')
         labels = self.table.get_labels(values, self.uniques,
                                        self.count, na_sentinel, na_value)
         mask = (labels == na_sentinel)
diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index fdcc4e61f77c9..d07329678d9f1 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -129,7 +129,6 @@ cdef class IndexEngine:
         # We assume before we get here:
         #  - val is hashable
         self._ensure_mapping_populated()
-        #print('THIS IS THE MAPPING ' + str(self.mapping))
         return val in self.mapping
 
     cpdef get_loc(self, object val):
@@ -159,8 +158,6 @@ cdef class IndexEngine:
             return self._get_loc_duplicates(val)
 
         try:
-            #print('THIS IS val: ' + str(val))
-            #print('THIS IS MAPPING: ' + str(self.mapping))
             return self.mapping.get_item(val)
         except OverflowError as err:
             # GH#41775 OverflowError e.g. if we are uint64 and val is -1
@@ -622,16 +619,13 @@ cdef class BaseMultiIndexCodesEngine:
 
         # Transform labels in a single array, and add 1 so that we are working
         # with positive integers (-1 for NaN becomes 0):
-        #print('\nTHIS ARE LABELS IN INITIALIZATION OF BASEMULTIINDEXCODESENGINE: ' + str(labels) + " THIS IS THE TYPE OF LABELS: " + str(type(labels)))
         codes = (np.array(labels, dtype='int64').T + 1).astype('uint64',
                                                                copy=False)
-        #print('\nTHIS ARE THE CODES: ' + str(codes) + " THIS IS THE TYPE OF CODES: " + str(type(codes)) + '\n')
 
         # Map each codes combination in the index to an integer unambiguously
         # (no collisions possible), based on the "offsets", which describe the
         # number of bits to switch labels for each level:
         lab_ints = self._codes_to_ints(codes)
-        #print('\nTHIS ARE THE LAB INTS: ' + str(lab_ints) + " THIS IS THE TYPE OF LAB INTS: " + str(type(codes)) + '\n')
 
         # Initialize underlying index (e.g. libindex.UInt64Engine) with
         # integers representing labels: we will use its get_loc and get_indexer
@@ -762,25 +756,19 @@ cdef class BaseMultiIndexCodesEngine:
         return sorted_indexer[np.argsort(target_order)]
 
     def get_loc(self, object key):
-        #print('get_loc, we want to get key' + str(key))
         if is_definitely_invalid_key(key):
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
-        # print('LEVELS: ' + str(type(self.levels[0])) + 'KEYS: ' + str(key))
-        try:
-            # indices = [0 if checknull(v) else lev.get_loc(v) + 1
-            #            for lev, v in zip(self.levels, key)]
-            indices = []
-            for key, lev in zip(key, self.levels):
-                try:
-                    indices.append(lev.get_loc(key) + 1)
-                except KeyError:
+        indices = []
+        for k, lev in zip(key, self.levels):
+            try:
+                indices.append(lev.get_loc(k) + 1)
+            except KeyError as err:
+                if checknull(k):
                     indices.append(0)
-            # indices = [lev.get_loc(v) + 1
-            #            for lev, v in zip(self.levels, key)]
-        except KeyError:
-            raise KeyError(key)
+                else:
+                    raise KeyError(key) from err
 
         # Transform indices into single integer:
         lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
diff --git a/pandas/core/generic.py b/pandas/core/generic.py
index 452b02dc0ad96..c3ad87082c8ed 100644
--- a/pandas/core/generic.py
+++ b/pandas/core/generic.py
@@ -1835,7 +1835,6 @@ def _get_label_or_level_values(self, key: str, axis: int = 0) -> np.ndarray:
         axis = self._get_axis_number(axis)
         other_axes = [ax for ax in range(self._AXIS_LEN) if ax != axis]
 
-        # import pdb; pdb.set_trace()
         if self._is_label_reference(key, axis=axis):
             self._check_label_or_level_ambiguity(key, axis=axis)
             values = self.xs(key, axis=other_axes[0])._values
@@ -3805,8 +3804,6 @@ class   animal   locomotion
         self._consolidate_inplace()
 
         if isinstance(index, MultiIndex):
-            #import pdb; pdb.set_trace()
-            import traceback as tb;
             loc, new_index = index._get_loc_level(key, level=0)
             if not drop_level:
                 if lib.is_integer(loc):
diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
index 274ffd33abe78..05047540c6ccd 100644
--- a/pandas/core/indexes/base.py
+++ b/pandas/core/indexes/base.py
@@ -3508,13 +3508,8 @@ def get_loc(self, key, method=None, tolerance=None):
                     "tolerance argument only valid if using pad, "
                     "backfill or nearest lookups"
                 )
-            # print(f'key before casting {key}')
             casted_key = self._maybe_cast_indexer(key)
             try:
-                #import pdb; pdb.set_trace()
-                # print(f'casted key {casted_key}')
-                # import pdb; pdb.set_trace()
-                # import traceback as tb; tb.print_stack()
                 return self._engine.get_loc(casted_key)
             except KeyError as err:
                 raise KeyError(key) from err
diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
index 8d72805e59d4d..e2f1a2d6a1e23 100644
--- a/pandas/core/indexes/multi.py
+++ b/pandas/core/indexes/multi.py
@@ -132,15 +132,7 @@ def _codes_to_ints(self, codes):
         """
         # Shift the representation of each level by the pre-calculated number
         # of bits:
-        #print(f'codes {codes}, self.offsets {self.offsets}')
-        #import pdb; pdb.set_trace()
         codes <<= self.offsets
-        #if len(codes.shape) == 1 and list(codes) == [4,0]:
-            # import traceback as tb; tb.print_stack()
-            #codes = np.array([4, 2])
-            #import pdb; pdb.set_trace()
-            #print('hi')
-        #print(f'codes after shifting {codes}')
 
         # Now sum and OR are in fact interchangeable. This is a simple
         # composition of the (disjunct) significant bits of each level (i.e.
@@ -3009,12 +3001,7 @@ def maybe_mi_droplevels(indexer, levels):
                 if len(key) == self.nlevels and self.is_unique:
                     # Complete key in unique index -> standard get_loc
                     try:
-                        engine = self._engine
-                        #import pdb; pdb.set_trace()
-                        # print(f'key before handing it to engine {engine} is key: {key}')
-                        # import pdb; pdb.set_trace()
-                        loc_res = engine.get_loc(key)
-                        return (loc_res, None)
+                        return (self._engine.get_loc(key), None)
                     except KeyError as err:
                         raise KeyError(key) from err
                     except TypeError:
diff --git a/pandas/core/indexing.py b/pandas/core/indexing.py
index a1c48a3213318..918a9ea1b8030 100644
--- a/pandas/core/indexing.py
+++ b/pandas/core/indexing.py
@@ -1140,7 +1140,6 @@ def _get_label(self, label, axis: int):
 
     def _handle_lowerdim_multi_index_axis0(self, tup: tuple):
         # we have an axis0 multi-index, handle or raise
-        #import pdb; pdb.set_trace()
         axis = self.axis or 0
         try:
             # fast path for series or for tup devoid of slices
diff --git a/pandas/tests/indexes/multi/test_indexing.py b/pandas/tests/indexes/multi/test_indexing.py
index e7b67fdc9f9c7..99322f474dd9e 100644
--- a/pandas/tests/indexes/multi/test_indexing.py
+++ b/pandas/tests/indexes/multi/test_indexing.py
@@ -625,7 +625,6 @@ def test_get_loc_cast_bool(self):
         # GH 19086 : int is casted to bool, but not vice-versa
         levels = [[False, True], np.arange(2, dtype="int64")]
         idx = MultiIndex.from_product(levels)
-        import pdb; pdb.set_trace()
 
         assert idx.get_loc((0, 1)) == 1
         assert idx.get_loc((1, 0)) == 2

From 7ab0c9ca567a27bd5a68f5d2b2c8058d940ff437 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 10:23:21 +0000
Subject: [PATCH 20/43] Fixes from pre-commit [automated commit]

---
 pandas/tests/indexing/multiindex/test_getitem.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 441591047eb62..f635e30570709 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -393,6 +393,7 @@ def test_loc_empty_multiindex():
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
 
+
 def test_loc_nan_multiindex():
     df = DataFrame(
         {
@@ -402,7 +403,7 @@ def test_loc_nan_multiindex():
         }
     )
 
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
     result = agg_df.loc[agg_df.index[-1]]
     expected = [2, 4]
     assert result == expected

From a758d4449e9fa8b9c6395282102592adc681f80c Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 12:53:58 +0200
Subject: [PATCH 21/43] add new test

---
 .../tests/indexing/multiindex/test_getitem.py | 25 +++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index f635e30570709..0cfe166e94d48 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -407,3 +407,28 @@ def test_loc_nan_multiindex():
     result = agg_df.loc[agg_df.index[-1]]
     expected = [2, 4]
     assert result == expected
+
+def test_loc_nan_multiindex2():
+    arrays = [[1, 1, 1], [np.nan, 100, np.nan]]
+    names = ("idx1", "idx2")
+    index = MultiIndex.from_arrays(arrays, names=names)
+    df = DataFrame([1, 2, 3], index=index, columns=['col1'])
+    result = df.loc[(1, np.nan)]
+
+    expected = DataFrame([1 , 3], index=MultiIndex.from_arrays([[1,1], [np.nan, np.nan]], names=names), columns=['col1'])
+    tm.assert_frame_equal(result, expected, check_index_type=False)
+
+
+def test_loc_nan_multiindex():
+    df = DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
+    result = agg_df.loc[agg_df.index[-1]]
+    expected = [2, 4]
+    assert result == expected

From c3ec7f0843dcc5e00338bc19e259387867955632 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:21:29 +0200
Subject: [PATCH 22/43] update test as desired in PR comments

---
 .../tests/indexing/multiindex/test_getitem.py | 25 ++++++++-----------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index 0cfe166e94d48..b08afeee42375 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -395,6 +395,9 @@ def test_loc_empty_multiindex():
 
 
 def test_loc_nan_multiindex():
+@pytest.mark.parametrize("dropna", [True, False])
+def test_loc_nan_multiindex(dropna):
+    # GH 43943
     df = DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],
@@ -403,9 +406,14 @@ def test_loc_nan_multiindex():
         }
     )
 
-    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
+    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=dropna)["x"].agg(list)
     result = agg_df.loc[agg_df.index[-1]]
-    expected = [2, 4]
+    if dropna:
+        # nans are dropped, therefore only [1, 3] is left
+        expected = [1, 3]
+    else:
+        # last index is (0, np.nan) which corresponds to the list [2, 4]
+        expected = [2, 4]
     assert result == expected
 
 def test_loc_nan_multiindex2():
@@ -419,16 +427,3 @@ def test_loc_nan_multiindex2():
     tm.assert_frame_equal(result, expected, check_index_type=False)
 
 
-def test_loc_nan_multiindex():
-    df = DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=False)["x"].agg(list)
-    result = agg_df.loc[agg_df.index[-1]]
-    expected = [2, 4]
-    assert result == expected

From 245b14191edfeb58d5db8884ca4c58134c7292da Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:29:23 +0200
Subject: [PATCH 23/43] add whatsnew entry

---
 doc/source/whatsnew/v1.3.4.rst                   |  1 +
 pandas/tests/indexing/multiindex/test_getitem.py | 15 +--------------
 2 files changed, 2 insertions(+), 14 deletions(-)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index b46744d51d74d..e63f500f0ab52 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -27,6 +27,7 @@ Fixed regressions
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 - Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`)
 - Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`)
+- Fixed regression in :meth:`DataFrame.loc` when `MultiIndex` contained `np.nan` (:issue`43814`)
 
 .. ---------------------------------------------------------------------------
 
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index b08afeee42375..baa98601f5a9b 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -394,7 +394,6 @@ def test_loc_empty_multiindex():
     tm.assert_frame_equal(result, expected)
 
 
-def test_loc_nan_multiindex():
 @pytest.mark.parametrize("dropna", [True, False])
 def test_loc_nan_multiindex(dropna):
     # GH 43943
@@ -406,7 +405,7 @@ def test_loc_nan_multiindex(dropna):
         }
     )
 
-    agg_df = df.groupby(by=['temp_playlist', 'objId'], dropna=dropna)["x"].agg(list)
+    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
     result = agg_df.loc[agg_df.index[-1]]
     if dropna:
         # nans are dropped, therefore only [1, 3] is left
@@ -415,15 +414,3 @@ def test_loc_nan_multiindex(dropna):
         # last index is (0, np.nan) which corresponds to the list [2, 4]
         expected = [2, 4]
     assert result == expected
-
-def test_loc_nan_multiindex2():
-    arrays = [[1, 1, 1], [np.nan, 100, np.nan]]
-    names = ("idx1", "idx2")
-    index = MultiIndex.from_arrays(arrays, names=names)
-    df = DataFrame([1, 2, 3], index=index, columns=['col1'])
-    result = df.loc[(1, np.nan)]
-
-    expected = DataFrame([1 , 3], index=MultiIndex.from_arrays([[1,1], [np.nan, np.nan]], names=names), columns=['col1'])
-    tm.assert_frame_equal(result, expected, check_index_type=False)
-
-

From ae706c87936e4e584cf6463256c9e67aed78ba51 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 10 Oct 2021 20:30:52 +0200
Subject: [PATCH 24/43] fix wrong issue number

---
 pandas/tests/indexing/multiindex/test_getitem.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index baa98601f5a9b..c9242789f1486 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -396,7 +396,7 @@ def test_loc_empty_multiindex():
 
 @pytest.mark.parametrize("dropna", [True, False])
 def test_loc_nan_multiindex(dropna):
-    # GH 43943
+    # GH 43814
     df = DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],

From 30097ca3eacfa90e369a7a5d6ff15bbe739ac720 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Tue, 12 Oct 2021 19:45:14 +0200
Subject: [PATCH 25/43] save nan encoding in new state variable and replace it
 before returning the output

---
 pandas/_libs/index.pyx                        | 14 +++++-------
 pandas/core/groupby/grouper.py                |  9 ++++++++
 pandas/core/groupby/ops.py                    | 17 +++++++++++++-
 pandas/tests/groupby/test_groupby_dropna.py   | 18 +++++++++++++++
 .../tests/indexing/multiindex/test_getitem.py | 22 -------------------
 5 files changed, 48 insertions(+), 32 deletions(-)

diff --git a/pandas/_libs/index.pyx b/pandas/_libs/index.pyx
index d07329678d9f1..92837a43e2b69 100644
--- a/pandas/_libs/index.pyx
+++ b/pandas/_libs/index.pyx
@@ -760,15 +760,11 @@ cdef class BaseMultiIndexCodesEngine:
             raise TypeError(f"'{key}' is an invalid key")
         if not isinstance(key, tuple):
             raise KeyError(key)
-        indices = []
-        for k, lev in zip(key, self.levels):
-            try:
-                indices.append(lev.get_loc(k) + 1)
-            except KeyError as err:
-                if checknull(k):
-                    indices.append(0)
-                else:
-                    raise KeyError(key) from err
+        try:
+            indices = [0 if checknull(v) else lev.get_loc(v) + 1
+                       for lev, v in zip(self.levels, key)]
+        except KeyError:
+            raise KeyError(key)
 
         # Transform indices into single integer:
         lab_int = self._codes_to_ints(np.array(indices, dtype='uint64'))
diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 7577b1e671d60..f62964ff378c0 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -486,6 +486,7 @@ def __init__(
         self._observed = observed
         self.in_axis = in_axis
         self._dropna = dropna
+        self._na_placeholder = None
 
         self._passed_categorical = False
 
@@ -690,6 +691,14 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             codes, uniques = algorithms.factorize(
                 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
             )
+            # GH43943, store placeholder for np.nan, will later be replaced by -1 in
+            # pandas/core/groupby:reconstructed_codes
+            if not self._dropna:
+                if any(
+                    isinstance(v, float) and np.isnan(v) for v in self.grouping_vector
+                ):
+                    self._na_placeholder = max(codes)
+
         return codes, uniques
 
     @cache_readonly
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 60c8851f059fe..b65a600634095 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -859,7 +859,22 @@ def ngroups(self) -> int:
     def reconstructed_codes(self) -> list[np.ndarray]:
         codes = self.codes
         ids, obs_ids, _ = self.group_info
-        return decons_obs_group_ids(ids, obs_ids, self.shape, codes, xnull=True)
+        reconstructed_codes = decons_obs_group_ids(
+            ids, obs_ids, self.shape, codes, xnull=True
+        )
+
+        def transform_codes(code_level, grouping):
+            if grouping._na_placeholder is not None:
+                return np.where(code_level == max(code_level), -1, code_level)
+            else:
+                return code_level
+
+        # import pdb; pdb.set_trace()
+        transformed_rec_codes = [
+            transform_codes(code_level, grouping)
+            for code_level, grouping in zip(reconstructed_codes, self._groupings)
+        ]
+        return transformed_rec_codes
 
     @final
     @cache_readonly
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ab568e24ff029..b8f3bb2c4c8a8 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -370,3 +370,21 @@ def test_groupby_nan_included():
         tm.assert_numpy_array_equal(result_values, expected_values)
     assert np.isnan(list(result.keys())[2])
     assert list(result.keys())[0:2] == ["g1", "g2"]
+
+
+def test_groupby_codes_with_nan_in_multiindex():
+    # GH 43814
+    df = pd.DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", np.nan, "o1", np.nan],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
+    expected = pd.MultiIndex.from_arrays(
+        [[0, 0], ["o1", np.nan]], names=["temp_playlist", "objId"]
+    )
+    result = grouped_df.index
+    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py
index c9242789f1486..3790a6e9a5319 100644
--- a/pandas/tests/indexing/multiindex/test_getitem.py
+++ b/pandas/tests/indexing/multiindex/test_getitem.py
@@ -392,25 +392,3 @@ def test_loc_empty_multiindex():
     result = df
     expected = DataFrame([1, 2, 3, 4], index=index, columns=["value"])
     tm.assert_frame_equal(result, expected)
-
-
-@pytest.mark.parametrize("dropna", [True, False])
-def test_loc_nan_multiindex(dropna):
-    # GH 43814
-    df = DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    agg_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].agg(list)
-    result = agg_df.loc[agg_df.index[-1]]
-    if dropna:
-        # nans are dropped, therefore only [1, 3] is left
-        expected = [1, 3]
-    else:
-        # last index is (0, np.nan) which corresponds to the list [2, 4]
-        expected = [2, 4]
-    assert result == expected

From ef951a6e90edd7bf6f0571cf47828fc9760ca9e3 Mon Sep 17 00:00:00 2001
From: CloseChoice <tobias.pitters@gmx.de>
Date: Tue, 12 Oct 2021 21:56:10 +0200
Subject: [PATCH 26/43] remove wrong whatsnew entry

---
 doc/source/whatsnew/v1.3.4.rst | 1 -
 1 file changed, 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.3.4.rst b/doc/source/whatsnew/v1.3.4.rst
index e63f500f0ab52..b46744d51d74d 100644
--- a/doc/source/whatsnew/v1.3.4.rst
+++ b/doc/source/whatsnew/v1.3.4.rst
@@ -27,7 +27,6 @@ Fixed regressions
 - Fixed regression in :meth:`Series.aggregate` attempting to pass ``args`` and ``kwargs`` multiple times to the user supplied ``func`` in certain cases (:issue:`43357`)
 - Fixed regression when iterating over a :class:`DataFrame.groupby.rolling` object causing the resulting DataFrames to have an incorrect index if the input groupings were not sorted (:issue:`43386`)
 - Fixed regression in :meth:`DataFrame.groupby.rolling.cov` and :meth:`DataFrame.groupby.rolling.corr` computing incorrect results if the input groupings were not sorted (:issue:`43386`)
-- Fixed regression in :meth:`DataFrame.loc` when `MultiIndex` contained `np.nan` (:issue`43814`)
 
 .. ---------------------------------------------------------------------------
 

From 0fa1784088939191d8eac8dfe38525f63b37be9e Mon Sep 17 00:00:00 2001
From: CloseChoice <tobias.pitters@gmx.de>
Date: Tue, 12 Oct 2021 22:00:49 +0200
Subject: [PATCH 27/43] remove debug statement

---
 pandas/core/groupby/ops.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index b65a600634095..fe4e14f992606 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -869,7 +869,6 @@ def transform_codes(code_level, grouping):
             else:
                 return code_level
 
-        # import pdb; pdb.set_trace()
         transformed_rec_codes = [
             transform_codes(code_level, grouping)
             for code_level, grouping in zip(reconstructed_codes, self._groupings)

From 1f780234f5cccdd4674a72759e6a4ef83c8c934b Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 13 Oct 2021 13:58:39 +0200
Subject: [PATCH 28/43] add whatsnew entry

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index e13e6380905f2..db677a50b35f0 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -591,7 +591,7 @@ Groupby/resample/rolling
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`)
 - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`)
-
+- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a `KeyError` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
 
 Reshaping
 ^^^^^^^^^

From 5b3c61d032af081db8dd9626a62a42bcb228a4e4 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 17 Oct 2021 20:39:15 +0200
Subject: [PATCH 29/43] fix wrong code quotes in whatsnew

---
 doc/source/whatsnew/v1.4.0.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst
index db677a50b35f0..3c40243d5a4f3 100644
--- a/doc/source/whatsnew/v1.4.0.rst
+++ b/doc/source/whatsnew/v1.4.0.rst
@@ -591,7 +591,7 @@ Groupby/resample/rolling
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` for centered datetimelike windows with uneven nanosecond (:issue:`43997`)
 - Bug in :meth:`GroupBy.nth` failing on ``axis=1`` (:issue:`43926`)
 - Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not respecting right bound on centered datetime-like windows, if the index contain duplicates (:issue:`#3944`)
-- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a `KeyError` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
+- Bug in :meth:`DataFrame.groupby` when grouping on multiple columns where at least one includes ``np.nan`` which resulted in a ``KeyError`` when the ``np.nan`` containing index was selected with :meth:`Series.loc` (:issue:`43814`)
 
 Reshaping
 ^^^^^^^^^

From 98c8f13cdeedd2024ae0118d2f18550bd15c3f65 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Fri, 22 Oct 2021 06:44:53 +0200
Subject: [PATCH 30/43] WIP: add support for (pd.NA, pd.NaT,
 np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")

---
 pandas/core/groupby/grouper.py              |  8 +++---
 pandas/core/groupby/ops.py                  |  3 ++-
 pandas/tests/groupby/test_groupby_dropna.py | 28 ++++++++++++++++++---
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index f62964ff378c0..43023264c5faf 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -34,6 +34,7 @@
     Categorical,
     ExtensionArray,
 )
+from pandas.core.base import isna
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import ops
@@ -50,6 +51,8 @@
 
 from pandas.io.formats.printing import pprint_thing
 
+from pandas._libs.tslibs.nattype import NaTType
+
 if TYPE_CHECKING:
     from pandas.core.generic import NDFrame
 
@@ -693,10 +696,9 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             )
             # GH43943, store placeholder for np.nan, will later be replaced by -1 in
             # pandas/core/groupby:reconstructed_codes
+            import pdb; pdb.set_trace()
             if not self._dropna:
-                if any(
-                    isinstance(v, float) and np.isnan(v) for v in self.grouping_vector
-                ):
+                if isna(self.grouping_vector).any():
                     self._na_placeholder = max(codes)
 
         return codes, uniques
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index fe4e14f992606..1e95bf8fa1f3e 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -863,9 +863,10 @@ def reconstructed_codes(self) -> list[np.ndarray]:
             ids, obs_ids, self.shape, codes, xnull=True
         )
 
+        import pdb; pdb.set_trace()
         def transform_codes(code_level, grouping):
             if grouping._na_placeholder is not None:
-                return np.where(code_level == max(code_level), -1, code_level)
+                return np.where(code_level == grouping._na_placeholder, -1, code_level)
             else:
                 return code_level
 
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index b8f3bb2c4c8a8..ed468b8d67546 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -372,19 +372,41 @@ def test_groupby_nan_included():
     assert list(result.keys())[0:2] == ["g1", "g2"]
 
 
-def test_groupby_codes_with_nan_in_multiindex():
+@pytest.mark.parametrize(
+    "na", [np.nan, pd.NaT, np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")]
+)
+def test_groupby_codes_with_nan_in_multiindex(na):
+    # GH 43814
+    df = pd.DataFrame(
+        {
+            "temp_playlist": [0, 0, 0, 0],
+            "objId": ["o1", na, "o1", na],
+            "x": [1, 2, 3, 4],
+        }
+    )
+
+    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
+    expected = pd.MultiIndex.from_arrays(
+        [[0, 0], ["o1", na]], names=["temp_playlist", "objId"]
+    )
+    result = grouped_df.index
+    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
+
+
+def test_groupby_codes_with_pd_nat_in_multiindex():
     # GH 43814
     df = pd.DataFrame(
         {
             "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", np.nan, "o1", np.nan],
+            "objId": ["o1", pd.NaT, "o1", pd.NaT],
             "x": [1, 2, 3, 4],
         }
     )
 
     grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
     expected = pd.MultiIndex.from_arrays(
-        [[0, 0], ["o1", np.nan]], names=["temp_playlist", "objId"]
+        [[0, 0], ["o1", pd.NaT]], names=["temp_playlist", "objId"]
     )
     result = grouped_df.index
+    # import pdb; pdb.set_trace()
     assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))

From ec218d52d02bbfc6879fb045a8c27c5d8ca1679b Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Fri, 22 Oct 2021 06:58:40 +0200
Subject: [PATCH 31/43] WIP: add support for (pd.NA, pd.NaT,
 np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")

---
 pandas/core/groupby/grouper.py              |  4 ++--
 pandas/core/groupby/ops.py                  |  1 -
 pandas/tests/groupby/test_groupby_dropna.py | 19 -------------------
 3 files changed, 2 insertions(+), 22 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 43023264c5faf..ad9763a68bc66 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -694,9 +694,9 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             codes, uniques = algorithms.factorize(
                 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
             )
-            # GH43943, store placeholder for np.nan, will later be replaced by -1 in
+            # GH43943, store placeholder for (np.nan, pd.NaT, np.datetime64("NaT", "ns"),
+            # np.timedelta64("NaT", "ns")), will later be replaced by -1 in
             # pandas/core/groupby:reconstructed_codes
-            import pdb; pdb.set_trace()
             if not self._dropna:
                 if isna(self.grouping_vector).any():
                     self._na_placeholder = max(codes)
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 1e95bf8fa1f3e..8c1e64aa53f70 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -863,7 +863,6 @@ def reconstructed_codes(self) -> list[np.ndarray]:
             ids, obs_ids, self.shape, codes, xnull=True
         )
 
-        import pdb; pdb.set_trace()
         def transform_codes(code_level, grouping):
             if grouping._na_placeholder is not None:
                 return np.where(code_level == grouping._na_placeholder, -1, code_level)
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ed468b8d67546..2485276503ac9 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -391,22 +391,3 @@ def test_groupby_codes_with_nan_in_multiindex(na):
     )
     result = grouped_df.index
     assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
-
-
-def test_groupby_codes_with_pd_nat_in_multiindex():
-    # GH 43814
-    df = pd.DataFrame(
-        {
-            "temp_playlist": [0, 0, 0, 0],
-            "objId": ["o1", pd.NaT, "o1", pd.NaT],
-            "x": [1, 2, 3, 4],
-        }
-    )
-
-    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
-    expected = pd.MultiIndex.from_arrays(
-        [[0, 0], ["o1", pd.NaT]], names=["temp_playlist", "objId"]
-    )
-    result = grouped_df.index
-    # import pdb; pdb.set_trace()
-    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))

From 6850b73c8cf42932c3c2cac08b56c617ff433903 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 6 Nov 2021 16:05:43 +0100
Subject: [PATCH 32/43] add tests for issue 36060

---
 pandas/tests/groupby/test_groupby_dropna.py | 29 ++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 2485276503ac9..70e6061f7f213 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -373,7 +373,8 @@ def test_groupby_nan_included():
 
 
 @pytest.mark.parametrize(
-    "na", [np.nan, pd.NaT, np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")]
+    "na",
+    [np.nan, pd.NaT, pd.NA, np.datetime64("NaT", "ns"), np.timedelta64("NaT", "ns")],
 )
 def test_groupby_codes_with_nan_in_multiindex(na):
     # GH 43814
@@ -391,3 +392,29 @@ def test_groupby_codes_with_nan_in_multiindex(na):
     )
     result = grouped_df.index
     assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
+
+
+def test_groupby_multiindex_multiply_with_series():
+    # GH#36060
+    df = pd.DataFrame(
+        {
+            "animal": ["Falcon", "Falcon", "Parrot", "Parrot"],
+            "type": [np.nan, np.nan, np.nan, np.nan],
+            "speed": [380.0, 370.0, 24.0, 26.0],
+        }
+    )
+    speed = df.groupby(["animal", "type"], dropna=False)["speed"].first()
+    # Reconstruct same index to allow for multiplication.
+    ix_wing = pd.MultiIndex.from_tuples(
+        [("Falcon", np.nan), ("Parrot", np.nan)], names=["animal", "type"]
+    )
+    wing = pd.Series([42, 44], index=ix_wing)
+
+    result = wing * speed
+    expected = pd.Series(
+        [15960.0, 1056.0],
+        index=pd.MultiIndex.from_tuples(
+            [("Falcon", np.nan), ("Parrot", np.nan)], names=["animal", "type"]
+        ),
+    )
+    tm.assert_series_equal(result, expected)

From e7a52e33a3ef1fc616f70265764ef279e592b5d1 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sat, 6 Nov 2021 15:14:40 +0000
Subject: [PATCH 33/43] Fixes from pre-commit [automated commit]

---
 pandas/core/groupby/grouper.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 86cb3c58e28ba..1a044fd5ec1aa 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -52,8 +52,6 @@
 
 from pandas.io.formats.printing import pprint_thing
 
-from pandas._libs.tslibs.nattype import NaTType
-
 if TYPE_CHECKING:
     from pandas.core.generic import NDFrame
 

From 9682350af8aa915cf7c4a684e5abf52406c3cfa2 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 7 Nov 2021 18:58:57 +0100
Subject: [PATCH 34/43] changes according to PR discussions

---
 pandas/core/groupby/grouper.py |  4 ++--
 pandas/core/groupby/ops.py     | 15 ++-------------
 pandas/core/sorting.py         | 27 +++++++++++++++++++++++++--
 3 files changed, 29 insertions(+), 17 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 86cb3c58e28ba..c5a481e1207cf 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -490,7 +490,7 @@ def __init__(
         self._observed = observed
         self.in_axis = in_axis
         self._dropna = dropna
-        self._na_placeholder = None
+        self._has_na_placeholder = False
 
         self._passed_categorical = False
 
@@ -700,7 +700,7 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             # pandas/core/groupby:reconstructed_codes
             if not self._dropna:
                 if isna(self.grouping_vector).any():
-                    self._na_placeholder = max(codes)
+                    self._has_na_placeholder = True
 
         return codes, uniques
 
diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index 8c1e64aa53f70..ee3572e8b90bf 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -860,20 +860,9 @@ def reconstructed_codes(self) -> list[np.ndarray]:
         codes = self.codes
         ids, obs_ids, _ = self.group_info
         reconstructed_codes = decons_obs_group_ids(
-            ids, obs_ids, self.shape, codes, xnull=True
+            ids, obs_ids, self.shape, codes, xnull=True, groupings=self._groupings
         )
-
-        def transform_codes(code_level, grouping):
-            if grouping._na_placeholder is not None:
-                return np.where(code_level == grouping._na_placeholder, -1, code_level)
-            else:
-                return code_level
-
-        transformed_rec_codes = [
-            transform_codes(code_level, grouping)
-            for code_level, grouping in zip(reconstructed_codes, self._groupings)
-        ]
-        return transformed_rec_codes
+        return reconstructed_codes
 
     @final
     @cache_readonly
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index 7813182222d67..fb3b3e1ed8264 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -41,6 +41,7 @@
 if TYPE_CHECKING:
     from pandas import MultiIndex
     from pandas.core.indexes.base import Index
+    from pandas.core.groupby.grouper import Grouping
 
 
 def get_indexer_indexer(
@@ -243,7 +244,12 @@ def decons_group_index(comp_labels, shape):
 
 
 def decons_obs_group_ids(
-    comp_ids: npt.NDArray[np.intp], obs_ids, shape, labels, xnull: bool
+    comp_ids: npt.NDArray[np.intp],
+    obs_ids,
+    shape,
+    labels,
+    xnull: bool,
+    groupings: list[Grouping] = None,
 ):
     """
     Reconstruct labels from observed group ids.
@@ -254,6 +260,21 @@ def decons_obs_group_ids(
     xnull : bool
         If nulls are excluded; i.e. -1 labels are passed through.
     """
+
+    def transform_codes(code_level, grouping):
+        if grouping._has_na_placeholder:
+            return np.where(code_level == max(code_level), -1, code_level)
+        else:
+            return code_level
+
+    def reconstruct_na_in_codes(codes):
+        if groupings:
+            codes = [
+                transform_codes(code_level, grouping)
+                for code_level, grouping in zip(codes, groupings)
+            ]
+        return codes
+
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")
         shape = np.asarray(shape, dtype="i8") + lift
@@ -261,10 +282,12 @@ def decons_obs_group_ids(
     if not is_int64_overflow_possible(shape):
         # obs ids are deconstructable! take the fast route!
         out = decons_group_index(obs_ids, shape)
+        out = reconstruct_na_in_codes(out)
         return out if xnull or not lift.any() else [x - y for x, y in zip(out, lift)]
 
     indexer = unique_label_indices(comp_ids)
-    return [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]
+    out = [lab[indexer].astype(np.intp, subok=False, copy=True) for lab in labels]
+    return reconstruct_na_in_codes(out)
 
 
 def indexer_from_factorized(

From cb92f953125711921f6467e48cab25d4b2bb5c46 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Sun, 7 Nov 2021 19:14:13 +0100
Subject: [PATCH 35/43] use assert_series_equal in test_groupby_dropna

---
 pandas/tests/groupby/test_groupby_dropna.py | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 70e6061f7f213..797b0d5dd36d0 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -386,12 +386,20 @@ def test_groupby_codes_with_nan_in_multiindex(na):
         }
     )
 
-    grouped_df = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
+    result = df.groupby(by=["temp_playlist", "objId"], dropna=False)["x"].sum()
     expected = pd.MultiIndex.from_arrays(
         [[0, 0], ["o1", na]], names=["temp_playlist", "objId"]
     )
-    result = grouped_df.index
-    assert all((res == ex).all() for res, ex in zip(result.codes, expected.codes))
+    expected = pd.Series(
+        [4, 6],
+        index=pd.MultiIndex(
+            levels=[[0], ["o1", np.nan]],
+            codes=[[0, 0], [0, -1]],
+            names=["temp_playlist", "objId"],
+        ),
+        name="x",
+    )
+    tm.assert_series_equal(result, expected)
 
 
 def test_groupby_multiindex_multiply_with_series():

From e3c734d40c7e0c91774fadd6eb74e25d4c4ab590 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 10 Nov 2021 17:21:52 +0100
Subject: [PATCH 36/43] make _has_na_placeholder cache_readonly

---
 pandas/core/groupby/grouper.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 7b281f53443eb..20f0cca9ff61e 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -488,7 +488,6 @@ def __init__(
         self._observed = observed
         self.in_axis = in_axis
         self._dropna = dropna
-        self._has_na_placeholder = False
 
         self._passed_categorical = False
 
@@ -603,6 +602,9 @@ def _ilevel(self) -> int | None:
             return index.names.index(level)
         return level
 
+    # @cache_readonly
+    # def _has
+
     @property
     def ngroups(self) -> int:
         return len(self.group_index)
@@ -693,12 +695,6 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
             codes, uniques = algorithms.factorize(
                 self.grouping_vector, sort=self._sort, na_sentinel=na_sentinel
             )
-            # GH43943, store placeholder for (np.nan, pd.NaT, np.datetime64("NaT", "ns"),
-            # np.timedelta64("NaT", "ns")), will later be replaced by -1 in
-            # pandas/core/groupby:reconstructed_codes
-            if not self._dropna:
-                if isna(self.grouping_vector).any():
-                    self._has_na_placeholder = True
 
         return codes, uniques
 
@@ -706,6 +702,15 @@ def _codes_and_uniques(self) -> tuple[np.ndarray, ArrayLike]:
     def groups(self) -> dict[Hashable, np.ndarray]:
         return self._index.groupby(Categorical.from_codes(self.codes, self.group_index))
 
+    @cache_readonly
+    def _has_na_placeholder(self) -> bool:
+        # GH43943, store placeholder for (np.nan, pd.NaT, np.datetime64("NaT", "ns"),
+        # np.timedelta64("NaT", "ns")), is used to replaced codes correctly to -1
+        # pandas/core/groupby:reconstructed_codes
+        if not self._dropna:
+            if isna(self.grouping_vector).any():
+                return True
+        return False
 
 def get_grouper(
     obj: NDFrameT,

From cf48e70ac063d82dae2ea91183530c86937ab8e8 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 10 Nov 2021 17:22:21 +0100
Subject: [PATCH 37/43] make _has_na_placeholder cache_readonly

---
 pandas/core/groupby/grouper.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 20f0cca9ff61e..333853f232439 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -712,6 +712,7 @@ def _has_na_placeholder(self) -> bool:
                 return True
         return False
 
+
 def get_grouper(
     obj: NDFrameT,
     key=None,

From 49b1d655e0b28b32a0f8d94a155daa046c898b77 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 10 Nov 2021 16:25:29 +0000
Subject: [PATCH 38/43] Fixes from pre-commit [automated commit]

---
 pandas/core/sorting.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index fb3b3e1ed8264..a953bc1edd557 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -40,8 +40,8 @@
 
 if TYPE_CHECKING:
     from pandas import MultiIndex
-    from pandas.core.indexes.base import Index
     from pandas.core.groupby.grouper import Grouping
+    from pandas.core.indexes.base import Index
 
 
 def get_indexer_indexer(

From d00b39ff0bfef9032c192a9c655196328bcd2fed Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 10 Nov 2021 17:35:05 +0100
Subject: [PATCH 39/43] remove commented out stuff

---
 pandas/core/groupby/grouper.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 333853f232439..1045747c98553 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -602,9 +602,6 @@ def _ilevel(self) -> int | None:
             return index.names.index(level)
         return level
 
-    # @cache_readonly
-    # def _has
-
     @property
     def ngroups(self) -> int:
         return len(self.group_index)

From d1809012630420e148b68496efa3b16baa326596 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 10 Nov 2021 18:18:46 +0100
Subject: [PATCH 40/43] remove unnecessary import

---
 pandas/core/groupby/grouper.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index 1045747c98553..c9e13d3923aa3 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -14,7 +14,6 @@
 
 import numpy as np
 
-from pandas._libs.tslibs.nattype import NaTType
 from pandas._typing import (
     ArrayLike,
     NDFrameT,

From cc6af9d732b23c9da605193e6007ffb844512d79 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Tue, 16 Nov 2021 05:53:35 +0100
Subject: [PATCH 41/43] WIP: intermediate commit for loop solution

---
 pandas/core/groupby/ops.py |  8 +++++++-
 pandas/core/sorting.py     | 26 +++++++++++++-------------
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/pandas/core/groupby/ops.py b/pandas/core/groupby/ops.py
index ee3572e8b90bf..f5de5e2a346fc 100644
--- a/pandas/core/groupby/ops.py
+++ b/pandas/core/groupby/ops.py
@@ -859,8 +859,14 @@ def ngroups(self) -> int:
     def reconstructed_codes(self) -> list[np.ndarray]:
         codes = self.codes
         ids, obs_ids, _ = self.group_info
+        # get the levels in which to reconstruct codes for na
+        levels_with_na = [
+            idx
+            for idx, grouping in enumerate(self.groupings)
+            if grouping._has_na_placeholder
+        ]
         reconstructed_codes = decons_obs_group_ids(
-            ids, obs_ids, self.shape, codes, xnull=True, groupings=self._groupings
+            ids, obs_ids, self.shape, codes, xnull=True, levels_with_na=levels_with_na
         )
         return reconstructed_codes
 
diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index a953bc1edd557..f9cf616396244 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -249,7 +249,7 @@ def decons_obs_group_ids(
     shape,
     labels,
     xnull: bool,
-    groupings: list[Grouping] = None,
+    levels_with_na: list[int] = None,
 ):
     """
     Reconstruct labels from observed group ids.
@@ -261,19 +261,19 @@ def decons_obs_group_ids(
         If nulls are excluded; i.e. -1 labels are passed through.
     """
 
-    def transform_codes(code_level, grouping):
-        if grouping._has_na_placeholder:
-            return np.where(code_level == max(code_level), -1, code_level)
-        else:
-            return code_level
-
     def reconstruct_na_in_codes(codes):
-        if groupings:
-            codes = [
-                transform_codes(code_level, grouping)
-                for code_level, grouping in zip(codes, groupings)
-            ]
-        return codes
+        new_codes = []
+        if levels_with_na:
+            for idx, code_level in enumerate(codes):
+                if idx in levels_with_na:
+                    new_codes.append(
+                        np.where(code_level == max(code_level), -1, code_level)
+                    )
+                else:
+                    new_codes.append(code_level)
+        else:
+            new_codes = codes
+        return new_codes
 
     if not xnull:
         lift = np.fromiter(((a == -1).any() for a in labels), dtype="i8")

From 02bf699a0d15035a7df5987a6ae1410dd8e3dad7 Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Tue, 16 Nov 2021 05:56:38 +0100
Subject: [PATCH 42/43] changes for static analysis checks

---
 pandas/core/sorting.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/pandas/core/sorting.py b/pandas/core/sorting.py
index f9cf616396244..5bf0ccd91ce59 100644
--- a/pandas/core/sorting.py
+++ b/pandas/core/sorting.py
@@ -40,7 +40,6 @@
 
 if TYPE_CHECKING:
     from pandas import MultiIndex
-    from pandas.core.groupby.grouper import Grouping
     from pandas.core.indexes.base import Index
 
 
@@ -249,7 +248,7 @@ def decons_obs_group_ids(
     shape,
     labels,
     xnull: bool,
-    levels_with_na: list[int] = None,
+    levels_with_na: list[int] | None = None,
 ):
     """
     Reconstruct labels from observed group ids.

From 45204444966702d799c301fa1b5055db7895f90c Mon Sep 17 00:00:00 2001
From: Tobias Pitters <tobias.pitters@gmail.com>
Date: Wed, 8 Dec 2021 21:17:48 +0100
Subject: [PATCH 43/43] fix imports

---
 pandas/core/groupby/grouper.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/groupby/grouper.py b/pandas/core/groupby/grouper.py
index fab0f6f879db8..e57de0f34a876 100644
--- a/pandas/core/groupby/grouper.py
+++ b/pandas/core/groupby/grouper.py
@@ -35,7 +35,7 @@
     Categorical,
     ExtensionArray,
 )
-from pandas.core.base import isna
+from pandas.core.dtypes.missing import isna
 import pandas.core.common as com
 from pandas.core.frame import DataFrame
 from pandas.core.groupby import ops