pandas-dev · mroeschke · Oct 12, 2022 · Oct 8, 2022 · Oct 8, 2022 · Oct 8, 2022
diff --git a/asv_bench/benchmarks/multiindex_object.py b/asv_bench/benchmarks/multiindex_object.py
@@ -239,10 +239,11 @@ class SetOperations:
         ("monotonic", "non_monotonic"),
         ("datetime", "int", "string", "ea_int"),
         ("intersection", "union", "symmetric_difference"),
+        (False, None),
     ]
-    param_names = ["index_structure", "dtype", "method"]
+    param_names = ["index_structure", "dtype", "method", "sort"]
 
-    def setup(self, index_structure, dtype, method):
+    def setup(self, index_structure, dtype, method, sort):
         N = 10**5
         level1 = range(1000)
 
@@ -272,8 +273,8 @@ def setup(self, index_structure, dtype, method):
         self.left = data[dtype]["left"]
         self.right = data[dtype]["right"]
 
-    def time_operation(self, index_structure, dtype, method):
-        getattr(self.left, method)(self.right)
+    def time_operation(self, index_structure, dtype, method, sort):
+        getattr(self.left, method)(self.right, sort=sort)
 
 
 class Difference:

diff --git a/doc/source/whatsnew/v1.6.0.rst b/doc/source/whatsnew/v1.6.0.rst
@@ -140,6 +140,7 @@ Performance improvements
 - Performance improvement in :meth:`MultiIndex.size` (:issue:`48723`)
 - Performance improvement in :meth:`MultiIndex.union` without missing values and without duplicates (:issue:`48505`)
 - Performance improvement in :meth:`MultiIndex.difference` (:issue:`48606`)
+- Performance improvement in :class:`MultiIndex` set operations with sort=None (:issue:`49010`)
 - Performance improvement in :meth:`.DataFrameGroupBy.mean`, :meth:`.SeriesGroupBy.mean`, :meth:`.DataFrameGroupBy.var`, and :meth:`.SeriesGroupBy.var` for extension array dtypes (:issue:`37493`)
 - Performance improvement in :meth:`MultiIndex.isin` when ``level=None`` (:issue:`48622`)
 - Performance improvement in :meth:`Index.union` and :meth:`MultiIndex.union` when index contains duplicates (:issue:`48900`)

diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py
@@ -14,7 +14,6 @@
     Sequence,
     cast,
     final,
-    overload,
 )
 import warnings
 
@@ -1846,10 +1845,8 @@ def safe_sort(
         raise TypeError(
             "Only list-like objects are allowed to be passed to safe_sort as values"
         )
-    original_values = values
-    is_mi = isinstance(original_values, ABCMultiIndex)
 
-    if not isinstance(values, (np.ndarray, ABCExtensionArray)):
+    if not isinstance(values, (np.ndarray, ABCExtensionArray, ABCMultiIndex)):
         # don't convert to string types
         dtype, _ = infer_dtype_from_array(values)
         # error: Argument "dtype" to "asarray" has incompatible type "Union[dtype[Any],
@@ -1869,17 +1866,13 @@ def safe_sort(
     else:
         try:
             sorter = values.argsort()
-            if is_mi:
-                # Operate on original object instead of casted array (MultiIndex)
-                ordered = original_values.take(sorter)
-            else:
-                ordered = values.take(sorter)
+            ordered = values.take(sorter)
         except TypeError:
             # Previous sorters failed or were not applicable, try `_sort_mixed`
             # which would work, but which fails for special case of 1d arrays
             # with tuples.
             if values.size and isinstance(values[0], tuple):
-                ordered = _sort_tuples(values, original_values)
+                ordered = _sort_tuples(values)
             else:
                 ordered = _sort_mixed(values)
 
@@ -1941,33 +1934,19 @@ def _sort_mixed(values) -> np.ndarray:
     )
 
 
-@overload
-def _sort_tuples(values: np.ndarray, original_values: np.ndarray) -> np.ndarray:
-    ...
-
-
-@overload
-def _sort_tuples(values: np.ndarray, original_values: MultiIndex) -> MultiIndex:
-    ...
-
-
-def _sort_tuples(
-    values: np.ndarray, original_values: np.ndarray | MultiIndex
-) -> np.ndarray | MultiIndex:
+def _sort_tuples(values: np.ndarray) -> np.ndarray:
     """
-    Convert array of tuples (1d) to array or array (2d).
+    Convert array of tuples (1d) to array of arrays (2d).
     We need to keep the columns separately as they contain different types and
     nans (can't use `np.sort` as it may fail when str and nan are mixed in a
     column as types cannot be compared).
-    We have to apply the indexer to the original values to keep the dtypes in
-    case of MultiIndexes
     """
     from pandas.core.internals.construction import to_arrays
     from pandas.core.sorting import lexsort_indexer
 
     arrays, _ = to_arrays(values, None)
     indexer = lexsort_indexer(arrays, orders=True)
-    return original_values[indexer]
+    return values[indexer]
 
 
 def union_with_duplicates(

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -555,7 +555,8 @@ def test_union_nan_got_duplicated():
     mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]])
     mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]])
     result = mi1.union(mi2)
-    tm.assert_index_equal(result, mi2)
+    expected = MultiIndex.from_arrays([[1.0, 3.0, np.nan], [2, 4, 3]])
+    tm.assert_index_equal(result, expected)
 
 
 @pytest.mark.parametrize("val", [4, 1])