pandas-dev · jreback · Nov 4, 2020 · Oct 13, 2020 · Oct 13, 2020 · Oct 13, 2020
diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst
@@ -412,6 +412,7 @@ MultiIndex
 
 - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
 - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
+- Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` with message ``"'<' not supported between instances of 'float' and 'str'"`` (:issue:`36562`)
 -
 
 I/O

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -4,6 +4,7 @@
 """
 from __future__ import annotations
 
+import functools
 import operator
 from textwrap import dedent
 from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast
@@ -2055,20 +2056,63 @@ def sort_mixed(values):
         strs = np.sort(values[str_pos])
         return np.concatenate([nums, np.asarray(strs, dtype=object)])
 
+    def sort_tuples(values):
+        # sorts tuples with mixed values. can handle nan vs string comparisons.
+        def cmp_func(index_x, index_y):
+            x = values[index_x]
+            y = values[index_y]
+            # shortcut loop in case both tuples are the same
+            if x == y:
+                return 0
+            # lexicographic sorting
+            for i in range(max(len(x), len(y))):
+                # check if the tuples have different lengths (shorter tuples
+                # first)
+                if i >= len(x):
+                    return -1
+                if i >= len(y):
+                    return +1
+                x_is_na = isna(x[i])
+                y_is_na = isna(y[i])
+                # values are the same -> resolve tie with next element
+                if (x_is_na and y_is_na) or (x[i] == y[i]):
+                    continue
+                # check for nan values (sort nan to the end)
+                if x_is_na and not y_is_na:
+                    return +1
+                if not x_is_na and y_is_na:
+                    return -1
+                # normal greater/less than comparison
+                if x[i] < y[i]:
+                    return -1
+                return +1
+            # both values are the same (should already have been caught)
+            return 0
+
+        ixs = np.arange(len(values))
+        ixs = sorted(ixs, key=functools.cmp_to_key(cmp_func))
+        return values[ixs]
+
     sorter = None
+
     if (
         not is_extension_array_dtype(values)
         and lib.infer_dtype(values, skipna=False) == "mixed-integer"
     ):
-        # unorderable in py3 if mixed str/int
         ordered = sort_mixed(values)
     else:
         try:
             sorter = values.argsort()
             ordered = values.take(sorter)
         except TypeError:
             # try this anyway
-            ordered = sort_mixed(values)
+            try:
+                ordered = sort_mixed(values)
+            except TypeError:
+                if values.size and isinstance(values[0], tuple):
+                    ordered = sort_tuples(values)
+                else:
+                    raise
 
     # codes:
 

diff --git a/pandas/tests/indexing/multiindex/test_multiindex.py b/pandas/tests/indexing/multiindex/test_multiindex.py
@@ -91,3 +91,30 @@ def test_multiindex_get_loc_list_raises(self):
         msg = "unhashable type"
         with pytest.raises(TypeError, match=msg):
             idx.get_loc([])
+
+
+def test_combine_first_with_nan_multiindex():
+    mi1 = pd.MultiIndex.from_arrays(
+        [["b", "b", "c", "a", "b", np.nan], [1, 2, 3, 4, 5, 6]], names=["a", "b"]
+    )
+    df = pd.DataFrame({"c": [1, 1, 1, 1, 1, 1]}, index=mi1)
+    mi2 = pd.MultiIndex.from_arrays(
+        [["a", "b", "c", "a", "b", "d"], [1, 1, 1, 1, 1, 1]], names=["a", "b"]
+    )
+    s = pd.Series([1, 2, 3, 4, 5, 6], index=mi2)
+    res = df.combine_first(pd.DataFrame({"d": s}))
+    mi_expected = pd.MultiIndex.from_arrays(
+        [
+            ["a", "a", "a", "b", "b", "b", "b", "c", "c", "d", np.nan],
+            [1, 1, 4, 1, 1, 2, 5, 1, 3, 1, 6],
+        ],
+        names=["a", "b"],
+    )
+    expected = pd.DataFrame(
+        {
+            "c": [np.nan, np.nan, 1, 1, 1, 1, 1, np.nan, 1, np.nan, 1],
+            "d": [1.0, 4.0, np.nan, 2.0, 5.0, np.nan, np.nan, 3.0, np.nan, 6.0, np.nan],
+        },
+        index=mi_expected,
+    )
+    tm.assert_frame_equal(res, expected)
diff --git a/pandas/tests/test_sorting.py b/pandas/tests/test_sorting.py
@@ -453,3 +453,10 @@ def test_extension_array_codes(self, verify, na_sentinel):
         expected_codes = np.array([0, 2, na_sentinel, 1], dtype=np.intp)
         tm.assert_extension_array_equal(result, expected_values)
         tm.assert_numpy_array_equal(codes, expected_codes)
+
+
+def test_mixed_str_nan():
+    values = np.array(["b", np.nan, "a", "b"], dtype=object)
+    result = safe_sort(values)
+    expected = np.array([np.nan, "a", "b", "b"], dtype=object)
+    tm.assert_numpy_array_equal(result, expected)
-Original file line number
+Diff line change
@@ Expand Up / @@ -412,6 +412,7 @@ MultiIndex @@
     - Bug in :meth:`DataFrame.xs` when used with :class:`IndexSlice` raises ``TypeError`` with message ``"Expected label or tuple of labels"`` (:issue:`35301`)
     - Bug in :meth:`DataFrame.reset_index` with ``NaT`` values in index raises ``ValueError`` with message ``"cannot convert float NaN to integer"`` (:issue:`36541`)
+    - Bug in :meth:`DataFrame.combine_first` when used with :class:`MultiIndex` containing string and ``NaN`` values raises ``TypeError`` with message ``"'<' not supported between instances of 'float' and 'str'"`` (:issue:`36562`)
     -
     I/O
@@ Expand Down @@