pandas-dev · jreback · Nov 4, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -469,6 +469,7 @@ MultiIndex
 
 - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
 - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
+- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
 
 I/O
 ^^^

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -2061,27 +2061,25 @@ def safe_sort(
         dtype, _ = infer_dtype_from_array(values)
         values = np.asarray(values, dtype=dtype)
 
-    def sort_mixed(values):
-        # order ints before strings, safe in py3
-        str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
-        nums = np.sort(values[~str_pos])
-        strs = np.sort(values[str_pos])
-        return np.concatenate([nums, np.asarray(strs, dtype=object)])
-
     sorter = None
+
     if (
         not is_extension_array_dtype(values)
         and lib.infer_dtype(values, skipna=False) == "mixed-integer"
     ):
-        # unorderable in py3 if mixed str/int
-        ordered = sort_mixed(values)
+        ordered = _sort_mixed(values)
     else:
         try:
             sorter = values.argsort()
             ordered = values.take(sorter)
         except TypeError:
-            # try this anyway
-            ordered = sort_mixed(values)
+            # Previous sorters failed or were not applicable, try `_sort_mixed`
+            # which would work, but which fails for special case of 1d arrays
+            # with tuples.
+            if values.size and isinstance(values[0], tuple):
+                ordered = _sort_tuples(values)
+            else:
+                ordered = _sort_mixed(values)
 
     # codes:
 
@@ -2128,3 +2126,26 @@ def sort_mixed(values):
         np.putmask(new_codes, mask, na_sentinel)
 
     return ordered, ensure_platform_int(new_codes)
+
+
+def _sort_mixed(values):
+    """ order ints before strings in 1d arrays, safe in py3 """
+    str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
+    nums = np.sort(values[~str_pos])
+    strs = np.sort(values[str_pos])
+    return np.concatenate([nums, np.asarray(strs, dtype=object)])
+
+
+def _sort_tuples(values: np.ndarray[tuple]):
+    """
+    Convert array of tuples (1d) to array or array (2d).
+    We need to keep the columns separately as they contain different types and
+    nans (can't use `np.sort` as it may fail when str and nan are mixed in a
+    column as types cannot be compared).
+    """
+    from pandas.core.internals.construction import to_arrays
+    from pandas.core.sorting import lexsort_indexer
+
+    arrays, _ = to_arrays(values, None)
+    indexer = lexsort_indexer(arrays, orders=True)
+    return values[indexer]
diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py
@@ -4,7 +4,7 @@
 import pytest
 
 import pandas as pd
-from pandas import DataFrame, Index, Series
+from pandas import DataFrame, Index, MultiIndex, Series
 import pandas._testing as tm
 
 
@@ -365,3 +365,32 @@ def test_combine_first_string_dtype_only_na(self):
             {"a": ["962", "85"], "b": [pd.NA] * 2}, dtype="string"
         ).set_index(["a", "b"])
         tm.assert_frame_equal(result, expected)
+
+
+def test_combine_first_with_nan_multiindex():
+    # gh-36562
+
+    mi1 = MultiIndex.from_arrays(
+        [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
+    )
+    df = DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
+    mi2 = MultiIndex.from_arrays(
+        [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
+    )
+    s = Series([1, 2, 3, 4, 5, 6], index=mi2)
+    res = df.combine_first(DataFrame({"d": s}))
+    mi_expected = MultiIndex.from_arrays(
+        [
+            ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
+            [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
+        ],
+        names=["a", "b"],
+    )
+    expected = DataFrame(
+        {
+            "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
+            "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
+        },
+        index=mi_expected,
+    )
+    tm.assert_frame_equal(res, expected)
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
@@ -453,3 +453,10 @@ def test_extension_array_codes(self, verify, na_sentinel):
         expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp)
         tm.assert_extension_array_equal(result, expected_values)
         tm.assert_numpy_array_equal(codes, expected_codes)
+
+
+def test_mixed_str_nan():
+    values = np.array(["b", np.nan, "a", "b"], dtype=object)
+    result = safe_sort(values)
+    expected = np.array([np.nan, "a", "b", "b"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -469,6 +469,7 @@ MultiIndex @@
     - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
     - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
+    - Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` (:issue:`36562`)
     I/O
     ^^^
@@ Expand Down @@