From ac7c99afe25185b7fc3a269446282171de51e754 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 14:57:00 +1000
Subject: [PATCH 01/11] BUG: add unit test, should fail (#35889)

---
 pandas/tests/groupby/test_groupby_dropna.py | 28 +++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index adf62c4723526..760daa0700f2b 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -276,3 +276,31 @@ def test_groupby_dropna_datetime_like_data(
     expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))
 
     tm.assert_frame_equal(grouped, expected)
+
+
+@pytest.mark.parametrize(
+    "dropna, inputs, outputs",
+    [
+        (
+            False,
+            {'groups': ['a', 'a', 'b', np.nan], 'values': [10, 10, 20, 30]},
+            {'groups': ['a', 'b', np.nan], 'values': [0, 1, 0, 0]}
+        ),
+    ],
+)
+def test_groupby_dropna_multi_index_dataframe_nan_apply(
+    dropna, inputs, outputs
+):
+    # GH 35889
+    # `groupby` with `dropna=False` and `apply` returning DataFrame of different
+    # sizes raises error if grouped column has nan values.
+
+    df = pd.DataFrame(inputs)
+    dfg = df.groupby('groups', dropna=dropna)
+    rv = dfg.apply(lambda grp: pd.DataFrame({'values': list(range(len(grp)))}))
+
+    tuples = tuple(zip(inputs['groups'], outputs['values']))
+    mi = pd.MultiIndex.from_tuples(tuples, names=['groups', None])
+
+    expected = pd.DataFrame(outputs, index=mi)
+    tm.assert_frame_equal(rv, expected)

From 6bae42af2932b3f6fef893651028323781ceaf8f Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 16:08:37 +1000
Subject: [PATCH 02/11] expand tests: group with no np.nan, fix expected output
 (#35889)

* tests should still fail.

* test dropna=True|False with no np.nan in groupings.

* fix expected outputs, declare expected MultiIndex in resulting
  dataframe after df.group().apply()
---
 pandas/tests/groupby/test_groupby_dropna.py | 33 ++++++++++++++++++---
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 760daa0700f2b..ab82d51061654 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -281,14 +281,35 @@ def test_groupby_dropna_datetime_like_data(
 @pytest.mark.parametrize(
     "dropna, inputs, outputs",
     [
-        (
+        pytest.param(
             False,
             {'groups': ['a', 'a', 'b', np.nan], 'values': [10, 10, 20, 30]},
-            {'groups': ['a', 'b', np.nan], 'values': [0, 1, 0, 0]}
+            {'values': [0, 1, 0, 0]},
+            id='dropna_false_has_nan'
+        ),
+        pytest.param(
+            True,
+            {'groups': ['a', 'a', 'b', np.nan], 'values': [10, 10, 20, 30]},
+            {'values': [0, 1, 0]},
+            id='dropna_true_has_nan'
+        ),
+        pytest.param(
+            # no nan in 'groups'; dropna=True|False should be same.
+            False,
+            {'groups': ['a', 'a', 'b', 'c'], 'values': [10, 10, 20, 30]},
+            {'values': [0, 1, 0, 0]},
+            id='dropna_false_no_nan'
+        ),
+        pytest.param(
+            # no nan in 'groups'; dropna=True|False should be same.
+            True,
+            {'groups': ['a', 'a', 'b', 'c'], 'values': [10, 10, 20, 30]},
+            {'values': [0, 1, 0, 0]},
+            id='dropna_true_no_nan'
         ),
     ],
 )
-def test_groupby_dropna_multi_index_dataframe_nan_apply(
+def test_groupby_dropna_multi_index_dataframe_apply(
     dropna, inputs, outputs
 ):
     # GH 35889
@@ -299,7 +320,11 @@ def test_groupby_dropna_multi_index_dataframe_nan_apply(
     dfg = df.groupby('groups', dropna=dropna)
     rv = dfg.apply(lambda grp: pd.DataFrame({'values': list(range(len(grp)))}))
 
-    tuples = tuple(zip(inputs['groups'], outputs['values']))
+    if dropna:
+        groups = [g for g in inputs['groups'] if g is not None]
+    else:
+        groups = inputs['groups']
+    tuples = tuple(zip(groups, outputs['values']))
     mi = pd.MultiIndex.from_tuples(tuples, names=['groups', None])
 
     expected = pd.DataFrame(outputs, index=mi)

From 26453dc5a97ce9288e61faad69e7a3de0ee118b7 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 16:18:09 +1000
Subject: [PATCH 03/11] double quotes instead of single quote (#35889)

---
 pandas/tests/groupby/test_groupby_dropna.py | 40 ++++++++++-----------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index ab82d51061654..76b281ac98467 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -283,29 +283,29 @@ def test_groupby_dropna_datetime_like_data(
     [
         pytest.param(
             False,
-            {'groups': ['a', 'a', 'b', np.nan], 'values': [10, 10, 20, 30]},
-            {'values': [0, 1, 0, 0]},
-            id='dropna_false_has_nan'
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            id="dropna_false_has_nan"
         ),
         pytest.param(
             True,
-            {'groups': ['a', 'a', 'b', np.nan], 'values': [10, 10, 20, 30]},
-            {'values': [0, 1, 0]},
-            id='dropna_true_has_nan'
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0]},
+            id="dropna_true_has_nan"
         ),
         pytest.param(
-            # no nan in 'groups'; dropna=True|False should be same.
+            # no nan in "groups"; dropna=True|False should be same.
             False,
-            {'groups': ['a', 'a', 'b', 'c'], 'values': [10, 10, 20, 30]},
-            {'values': [0, 1, 0, 0]},
-            id='dropna_false_no_nan'
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            id="dropna_false_no_nan"
         ),
         pytest.param(
-            # no nan in 'groups'; dropna=True|False should be same.
+            # no nan in "groups"; dropna=True|False should be same.
             True,
-            {'groups': ['a', 'a', 'b', 'c'], 'values': [10, 10, 20, 30]},
-            {'values': [0, 1, 0, 0]},
-            id='dropna_true_no_nan'
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            id="dropna_true_no_nan"
         ),
     ],
 )
@@ -317,15 +317,15 @@ def test_groupby_dropna_multi_index_dataframe_apply(
     # sizes raises error if grouped column has nan values.
 
     df = pd.DataFrame(inputs)
-    dfg = df.groupby('groups', dropna=dropna)
-    rv = dfg.apply(lambda grp: pd.DataFrame({'values': list(range(len(grp)))}))
+    dfg = df.groupby("groups", dropna=dropna)
+    rv = dfg.apply(lambda grp: pd.DataFrame({"values": list(range(len(grp)))}))
 
     if dropna:
-        groups = [g for g in inputs['groups'] if g is not None]
+        groups = [g for g in inputs["groups"] if g is not None]
     else:
-        groups = inputs['groups']
-    tuples = tuple(zip(groups, outputs['values']))
-    mi = pd.MultiIndex.from_tuples(tuples, names=['groups', None])
+        groups = inputs["groups"]
+    tuples = tuple(zip(groups, outputs["values"]))
+    mi = pd.MultiIndex.from_tuples(tuples, names=["groups", None])
 
     expected = pd.DataFrame(outputs, index=mi)
     tm.assert_frame_equal(rv, expected)

From 87dbfd921699176d18afde2b39cfb34e0184293c Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 16:31:18 +1000
Subject: [PATCH 04/11] adjust comparison: handle np.nan compare (#35889)

* nans at same positions in `level` and `key` compares as equal.
---
 pandas/core/reshape/concat.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 9e8fb643791f2..baec62b6f030f 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -9,6 +9,7 @@
 
 from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label
 
+from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
 
@@ -619,17 +620,16 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
         codes_list = []
 
         # things are potentially different sizes, so compute the exact codes
-        # for each level and pass those to MultiIndex.from_arrays
-
+        # for each level and pass those to MultiIndex.from_arrays.
         for hlevel, level in zip(zipped, levels):
             to_concat = []
             for key, index in zip(hlevel, indexes):
-                mask = level == key
+                mask = ((isna(level) & isna(key)) | (level == key))
                 if not mask.any():
                     raise ValueError(f"Key {key} not in level {level}")
-                i = np.nonzero(level == key)[0][0]
-
+                i = np.nonzero(mask)[0][0]
                 to_concat.append(np.repeat(i, len(index)))
+
             codes_list.append(np.concatenate(to_concat))
 
         concat_index = _concat_indexes(indexes)

From 2a1e804511b26b601bf34e91d5995646e2f7fd93 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 18:10:27 +1000
Subject: [PATCH 05/11] refactor test: handle MultiIndex dropping nan (#35889)

* this makes test pass.

* follow existing style where we create MultiIndex,
  then `set_levels` to reinsert nan for case when
  `dropna=False`, and groups has nan grouping.
---
 pandas/tests/groupby/test_groupby_dropna.py | 22 ++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 76b281ac98467..1ee9ea73ad360 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -279,18 +279,20 @@ def test_groupby_dropna_datetime_like_data(
 
 
 @pytest.mark.parametrize(
-    "dropna, inputs, outputs",
+    "dropna, df_cols_in, df_cols_out, levels",
     [
         pytest.param(
             False,
             {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
+            ["a", "b", np.nan],
             id="dropna_false_has_nan"
         ),
         pytest.param(
             True,
             {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0]},
+            None,
             id="dropna_true_has_nan"
         ),
         pytest.param(
@@ -298,6 +300,7 @@ def test_groupby_dropna_datetime_like_data(
             False,
             {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
+            None,
             id="dropna_false_no_nan"
         ),
         pytest.param(
@@ -305,27 +308,28 @@ def test_groupby_dropna_datetime_like_data(
             True,
             {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
+            None,
             id="dropna_true_no_nan"
         ),
     ],
 )
 def test_groupby_dropna_multi_index_dataframe_apply(
-    dropna, inputs, outputs
+    dropna, df_cols_in, df_cols_out, levels
 ):
     # GH 35889
     # `groupby` with `dropna=False` and `apply` returning DataFrame of different
     # sizes raises error if grouped column has nan values.
 
-    df = pd.DataFrame(inputs)
+    df = pd.DataFrame(df_cols_in)
     dfg = df.groupby("groups", dropna=dropna)
     rv = dfg.apply(lambda grp: pd.DataFrame({"values": list(range(len(grp)))}))
 
-    if dropna:
-        groups = [g for g in inputs["groups"] if g is not None]
-    else:
-        groups = inputs["groups"]
-    tuples = tuple(zip(groups, outputs["values"]))
+    tuples = tuple(zip(df_cols_in["groups"], df_cols_out["values"]))
     mi = pd.MultiIndex.from_tuples(tuples, names=["groups", None])
+    # Since right now, by default MI will drop NA from levels when we create MI
+    # via `from_*`, so we need to add NA for level manually afterwards.
+    if not dropna and levels:
+        mi = mi.set_levels(levels, level="groups")
 
-    expected = pd.DataFrame(outputs, index=mi)
+    expected = pd.DataFrame(df_cols_out, index=mi)
     tm.assert_frame_equal(rv, expected)

From 3c754d33b2f0c311f38bd5d3920e9b8354a3bc3a Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 18:42:37 +1000
Subject: [PATCH 06/11] BUG: update rst (#35889)

---
 doc/source/whatsnew/v1.1.2.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 9747a8ef3e71f..6d3ab55381c03 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -29,6 +29,7 @@ Bug fixes
 - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
+- Bug in :meth:`DataFrame.apply` on :meth:`DataFrame.groupby`, ``dropna=False`` and ``np.nan`` group(s) (:issue:`35889`)
 -
 
 .. ---------------------------------------------------------------------------

From 2cc4f39687e4456fe719ded52f4ce09916e9c5d9 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 18:57:31 +1000
Subject: [PATCH 07/11] BUG: run code formatters (#35889)

* black pandas

* git diff upstream/master -u -- "*.py" | flake8 --diff
---
 pandas/core/reshape/concat.py               | 2 +-
 pandas/tests/groupby/test_groupby_dropna.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index baec62b6f030f..ceddd5ca186ac 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -624,7 +624,7 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
         for hlevel, level in zip(zipped, levels):
             to_concat = []
             for key, index in zip(hlevel, indexes):
-                mask = ((isna(level) & isna(key)) | (level == key))
+                mask = (isna(level) & isna(key)) | (level == key)
                 if not mask.any():
                     raise ValueError(f"Key {key} not in level {level}")
                 i = np.nonzero(mask)[0][0]
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 1ee9ea73ad360..5c17a1955086d 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -286,14 +286,14 @@ def test_groupby_dropna_datetime_like_data(
             {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
             ["a", "b", np.nan],
-            id="dropna_false_has_nan"
+            id="dropna_false_has_nan",
         ),
         pytest.param(
             True,
             {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0]},
             None,
-            id="dropna_true_has_nan"
+            id="dropna_true_has_nan",
         ),
         pytest.param(
             # no nan in "groups"; dropna=True|False should be same.
@@ -301,7 +301,7 @@ def test_groupby_dropna_datetime_like_data(
             {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
             None,
-            id="dropna_false_no_nan"
+            id="dropna_false_no_nan",
         ),
         pytest.param(
             # no nan in "groups"; dropna=True|False should be same.
@@ -309,7 +309,7 @@ def test_groupby_dropna_datetime_like_data(
             {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
             {"values": [0, 1, 0, 0]},
             None,
-            id="dropna_true_no_nan"
+            id="dropna_true_no_nan",
         ),
     ],
 )

From 8a2eab89a6d6f8835613cd4cfb7b084a9fba4b98 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Fri, 28 Aug 2020 20:53:33 +1000
Subject: [PATCH 08/11] fix isort error during CI (#35889)

---
 pandas/core/reshape/concat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index ceddd5ca186ac..cf20626fd3ac1 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -9,9 +9,9 @@
 
 from pandas._typing import FrameOrSeries, FrameOrSeriesUnion, Label
 
-from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays.categorical import (
     factorize_from_iterable,

From aadbd750125a43651a7cee852099d84113cc5e1f Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Sat, 29 Aug 2020 22:42:54 +1000
Subject: [PATCH 09/11] BUG: address PR review comments (#35889)

---
 doc/source/whatsnew/v1.1.2.rst              |  2 +-
 pandas/tests/groupby/test_groupby_dropna.py | 22 ++++++++++-----------
 2 files changed, 11 insertions(+), 13 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 6d3ab55381c03..3990cd07d7166 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -29,7 +29,7 @@ Bug fixes
 - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
-- Bug in :meth:`DataFrame.apply` on :meth:`DataFrame.groupby`, ``dropna=False`` and ``np.nan`` group(s) (:issue:`35889`)
+- Bug in :meth:`DataFrame.groupby(...).apply(...)` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 5c17a1955086d..6ea41a45d4e7f 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -279,7 +279,7 @@ def test_groupby_dropna_datetime_like_data(
 
 
 @pytest.mark.parametrize(
-    "dropna, df_cols_in, df_cols_out, levels",
+    "dropna, data, selected_data, levels",
     [
         pytest.param(
             False,
@@ -313,23 +313,21 @@ def test_groupby_dropna_datetime_like_data(
         ),
     ],
 )
-def test_groupby_dropna_multi_index_dataframe_apply(
-    dropna, df_cols_in, df_cols_out, levels
+def test_groupby_apply_with_dropna_for_multi_index(
+    dropna, data, selected_data, levels
 ):
     # GH 35889
-    # `groupby` with `dropna=False` and `apply` returning DataFrame of different
-    # sizes raises error if grouped column has nan values.
 
-    df = pd.DataFrame(df_cols_in)
-    dfg = df.groupby("groups", dropna=dropna)
-    rv = dfg.apply(lambda grp: pd.DataFrame({"values": list(range(len(grp)))}))
+    df = pd.DataFrame(data)
+    gb = df.groupby("groups", dropna=dropna)
+    result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
 
-    tuples = tuple(zip(df_cols_in["groups"], df_cols_out["values"]))
-    mi = pd.MultiIndex.from_tuples(tuples, names=["groups", None])
+    mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
+    mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
     # Since right now, by default MI will drop NA from levels when we create MI
     # via `from_*`, so we need to add NA for level manually afterwards.
     if not dropna and levels:
         mi = mi.set_levels(levels, level="groups")
 
-    expected = pd.DataFrame(df_cols_out, index=mi)
-    tm.assert_frame_equal(rv, expected)
+    expected = pd.DataFrame(selected_data, index=mi)
+    tm.assert_frame_equal(result, expected)

From 311059c36ae19c9c438ff712ce5063149aee31e1 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Sat, 29 Aug 2020 23:21:03 +1000
Subject: [PATCH 10/11] forgot to run black, flake8 (#35889)

---
 pandas/tests/groupby/test_groupby_dropna.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index 6ea41a45d4e7f..2ef5dbcf41758 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -313,9 +313,7 @@ def test_groupby_dropna_datetime_like_data(
         ),
     ],
 )
-def test_groupby_apply_with_dropna_for_multi_index(
-    dropna, data, selected_data, levels
-):
+def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels):
     # GH 35889
 
     df = pd.DataFrame(data)

From 714c81ab0e08da5f2b96783b7850b7fcee0a1008 Mon Sep 17 00:00:00 2001
From: David Kwong <dav.kcw@gmail.com>
Date: Wed, 2 Sep 2020 11:09:34 +1000
Subject: [PATCH 11/11] BUG: address review comments (#35889)

---
 doc/source/whatsnew/v1.1.2.rst | 1 -
 doc/source/whatsnew/v1.2.0.rst | 3 ++-
 pandas/core/reshape/concat.py  | 6 ++++--
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/doc/source/whatsnew/v1.1.2.rst b/doc/source/whatsnew/v1.1.2.rst
index 3990cd07d7166..9747a8ef3e71f 100644
--- a/doc/source/whatsnew/v1.1.2.rst
+++ b/doc/source/whatsnew/v1.1.2.rst
@@ -29,7 +29,6 @@ Bug fixes
 - Bug in :class:`Series` constructor raising a ``TypeError`` when constructing sparse datetime64 dtypes (:issue:`35762`)
 - Bug in :meth:`DataFrame.apply` with ``result_type="reduce"`` returning with incorrect index (:issue:`35683`)
 - Bug in :meth:`DateTimeIndex.format` and :meth:`PeriodIndex.format` with ``name=True`` setting the first item to ``"None"`` where it should bw ``""`` (:issue:`35712`)
-- Bug in :meth:`DataFrame.groupby(...).apply(...)` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
 -
 
 .. ---------------------------------------------------------------------------
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index 55570341cf4e8..0e376511ddd08 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -163,7 +163,8 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
+-
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 30486b6e2bff0..9b94dae8556f6 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -620,16 +620,18 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
         codes_list = []
 
         # things are potentially different sizes, so compute the exact codes
-        # for each level and pass those to MultiIndex.from_arrays.
+        # for each level and pass those to MultiIndex.from_arrays
+
         for hlevel, level in zip(zipped, levels):
             to_concat = []
             for key, index in zip(hlevel, indexes):
+                # Find matching codes, include matching nan values as equal.
                 mask = (isna(level) & isna(key)) | (level == key)
                 if not mask.any():
                     raise ValueError(f"Key {key} not in level {level}")
                 i = np.nonzero(mask)[0][0]
-                to_concat.append(np.repeat(i, len(index)))
 
+                to_concat.append(np.repeat(i, len(index)))
             codes_list.append(np.concatenate(to_concat))
 
         concat_index = _concat_indexes(indexes)