pandas-dev · jreback · May 26, 2021 · Jan 4, 2021 · Jan 5, 2021 · Jan 5, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -843,7 +843,7 @@ Interval
 Indexing
 ^^^^^^^^
 
-- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
+- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`40862`)
 - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
 - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
 - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -291,7 +291,7 @@ def item_from_zerodim(val: object) -> object:
 
 @cython.wraparound(False)
 @cython.boundscheck(False)
-def fast_unique_multiple(list arrays, sort: bool = True) -> list:
+def fast_unique_multiple(list arrays, sort: bool = True):
     """
     Generate a list of unique values from a list of arrays.
 

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3574,14 +3574,20 @@ def equal_levels(self, other: MultiIndex) -> bool:
 
     def _union(self, other, sort) -> MultiIndex:
         other, result_names = self._convert_can_do_setop(other)
+        if (
+            any(-1 in code for code in self.codes)
+            and any(-1 in code for code in self.codes)
+            or self.has_duplicates
+            or other.has_duplicates
+        ):
+            # This is only necessary if both sides have nans or one has dups,
+            # fast_unique_multiple is faster
+            result = super()._union(other, sort)
+        else:
+            rvals = other._values.astype(object, copy=False)
+            result = lib.fast_unique_multiple([self._values, rvals], sort=sort)
 
-        # We could get here with CategoricalIndex other
-        rvals = other._values.astype(object, copy=False)
-        uniq_tuples = lib.fast_unique_multiple([self._values, rvals], sort=sort)
-
-        return MultiIndex.from_arrays(
-            zip(*uniq_tuples), sortorder=0, names=result_names
-        )
+        return MultiIndex.from_arrays(zip(*result), sortorder=0, names=result_names)
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         return is_object_dtype(dtype)

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -3,7 +3,9 @@
 
 import pandas as pd
 from pandas import (
+    CategoricalIndex,
     Index,
+    IntervalIndex,
     MultiIndex,
     Series,
 )
@@ -508,3 +510,26 @@ def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
     result = mi1.intersection(mi2)
     expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]])
     tm.assert_index_equal(result, expected)
+
+
+def test_union_nan_got_duplicated():
+    # GH#38977
+    mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]])
+    mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]])
+    result = mi1.union(mi2)
+    tm.assert_index_equal(result, mi2)
+
+
+def test_union_duplicates(index):
+    # GH#38977
+    if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
+        # No duplicates in empty indexes
+        return
+    values = index.unique().values.tolist()
+    mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
+    mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
+    result = mi1.union(mi2)
+    tm.assert_index_equal(result, mi2.sort_values())
+
+    result = mi2.union(mi1)
+    tm.assert_index_equal(result, mi2.sort_values())
diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas._libs import (
-    Timestamp,
     lib,
     writers as libwriters,
 )
@@ -43,11 +42,6 @@ def test_fast_unique_multiple_list_gen_sort(self):
         out = lib.fast_unique_multiple_list_gen(gen, sort=False)
         tm.assert_numpy_array_equal(np.array(out), expected)
 
-    def test_fast_unique_multiple_unsortable_runtimewarning(self):
-        arr = [np.array(["foo", Timestamp("2000")])]
-        with tm.assert_produces_warning(RuntimeWarning):
-            lib.fast_unique_multiple(arr, sort=None)
-
 
 class TestIndexing:
     def test_maybe_indices_to_slice_left_edge(self):