diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
index e65daa439a225..aa3255e673797 100644
--- a/doc/source/whatsnew/v1.2.0.rst
+++ b/doc/source/whatsnew/v1.2.0.rst
@@ -214,7 +214,8 @@ Performance improvements
 
 Bug fixes
 ~~~~~~~~~
-
+- Bug in :meth:`DataFrameGroupBy.apply` raising error with ``np.nan`` group(s) when ``dropna=False`` (:issue:`35889`)
+-
 
 Categorical
 ^^^^^^^^^^^
diff --git a/pandas/core/reshape/concat.py b/pandas/core/reshape/concat.py
index 299b68c6e71e0..9b94dae8556f6 100644
--- a/pandas/core/reshape/concat.py
+++ b/pandas/core/reshape/concat.py
@@ -11,6 +11,7 @@
 
 from pandas.core.dtypes.concat import concat_compat
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries
+from pandas.core.dtypes.missing import isna
 
 from pandas.core.arrays.categorical import (
     factorize_from_iterable,
@@ -624,10 +625,11 @@ def _make_concat_multiindex(indexes, keys, levels=None, names=None) -> MultiInde
         for hlevel, level in zip(zipped, levels):
             to_concat = []
             for key, index in zip(hlevel, indexes):
-                mask = level == key
+                # Find matching codes, include matching nan values as equal.
+                mask = (isna(level) & isna(key)) | (level == key)
                 if not mask.any():
                     raise ValueError(f"Key {key} not in level {level}")
-                i = np.nonzero(level == key)[0][0]
+                i = np.nonzero(mask)[0][0]
 
                 to_concat.append(np.repeat(i, len(index)))
             codes_list.append(np.concatenate(to_concat))
diff --git a/pandas/tests/groupby/test_groupby_dropna.py b/pandas/tests/groupby/test_groupby_dropna.py
index d1501111cb22b..66db06eeebdfb 100644
--- a/pandas/tests/groupby/test_groupby_dropna.py
+++ b/pandas/tests/groupby/test_groupby_dropna.py
@@ -274,3 +274,56 @@ def test_groupby_dropna_datetime_like_data(
     expected = pd.DataFrame({"values": values}, index=pd.Index(indexes, name="dt"))
 
     tm.assert_frame_equal(grouped, expected)
+
+
+@pytest.mark.parametrize(
+    "dropna, data, selected_data, levels",
+    [
+        pytest.param(
+            False,
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            ["a", "b", np.nan],
+            id="dropna_false_has_nan",
+        ),
+        pytest.param(
+            True,
+            {"groups": ["a", "a", "b", np.nan], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0]},
+            None,
+            id="dropna_true_has_nan",
+        ),
+        pytest.param(
+            # no nan in "groups"; dropna=True|False should be same.
+            False,
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            None,
+            id="dropna_false_no_nan",
+        ),
+        pytest.param(
+            # no nan in "groups"; dropna=True|False should be same.
+            True,
+            {"groups": ["a", "a", "b", "c"], "values": [10, 10, 20, 30]},
+            {"values": [0, 1, 0, 0]},
+            None,
+            id="dropna_true_no_nan",
+        ),
+    ],
+)
+def test_groupby_apply_with_dropna_for_multi_index(dropna, data, selected_data, levels):
+    # GH 35889
+
+    df = pd.DataFrame(data)
+    gb = df.groupby("groups", dropna=dropna)
+    result = gb.apply(lambda grp: pd.DataFrame({"values": range(len(grp))}))
+
+    mi_tuples = tuple(zip(data["groups"], selected_data["values"]))
+    mi = pd.MultiIndex.from_tuples(mi_tuples, names=["groups", None])
+    # Since right now, by default MI will drop NA from levels when we create MI
+    # via `from_*`, so we need to add NA for level manually afterwards.
+    if not dropna and levels:
+        mi = mi.set_levels(levels, level="groups")
+
+    expected = pd.DataFrame(selected_data, index=mi)
+    tm.assert_frame_equal(result, expected)