pandas-dev · jreback · May 26, 2021 · Jan 4, 2021 · Jan 5, 2021 · Jan 5, 2021
diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst
@@ -678,7 +678,7 @@ Interval
 Indexing
 ^^^^^^^^
 
-- Bug in :meth:`Index.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`)
+- Bug in :meth:`Index.union` and :meth:`MultiIndex.union` dropping duplicate ``Index`` values when ``Index`` was not monotonic or ``sort`` was set to ``False`` (:issue:`36289`, :issue:`31326`, :issue:`38745`)
 - Bug in :meth:`CategoricalIndex.get_indexer` failing to raise ``InvalidIndexError`` when non-unique (:issue:`38372`)
 - Bug in inserting many new columns into a :class:`DataFrame` causing incorrect subsequent indexing behavior (:issue:`38380`)
 - Bug in :meth:`DataFrame.__setitem__` raising ``ValueError`` when setting multiple values to duplicate columns (:issue:`15695`)

diff --git a/pandas/_libs/lib.pyx b/pandas/_libs/lib.pyx
@@ -268,54 +268,6 @@ def item_from_zerodim(val: object) -> object:
     return val
 
 
-@cython.wraparound(False)
-@cython.boundscheck(False)
-def fast_unique_multiple(list arrays, sort: bool = True) -> list:
-    """
-    Generate a list of unique values from a list of arrays.
-
-    Parameters
-    ----------
-    list : array-like
-        List of array-like objects.
-    sort : bool
-        Whether or not to sort the resulting unique list.
-
-    Returns
-    -------
-    list of unique values
-    """
-    cdef:
-        ndarray[object] buf
-        Py_ssize_t k = len(arrays)
-        Py_ssize_t i, j, n
-        list uniques = []
-        dict table = {}
-        object val, stub = 0
-
-    for i in range(k):
-        buf = arrays[i]
-        n = len(buf)
-        for j in range(n):
-            val = buf[j]
-            if val not in table:
-                table[val] = stub
-                uniques.append(val)
-
-    if sort is None:
-        try:
-            uniques.sort()
-        except TypeError:
-            warnings.warn(
-                "The values in the array are unorderable. "
-                "Pass `sort=False` to suppress this warning.",
-                RuntimeWarning,
-            )
-            pass
-
-    return uniques
-
-
 @cython.wraparound(False)
 @cython.boundscheck(False)
 def fast_unique_multiple_list(lists: list, sort: bool = True) -> list:

diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py
@@ -2979,8 +2979,8 @@ def _union(self, other: Index, sort):
                 value_list.extend([x for x in rvals if x not in value_set])
                 return Index(value_list)._values  # do type inference here
 
-        elif not other.is_unique and not self.is_unique:
-            # self and other both have duplicates
+        elif not other.is_unique:
+            # other has duplicates
 
             # error: Argument 1 to "union_with_duplicates" has incompatible type
             # "Union[ExtensionArray, ndarray]"; expected "ndarray"
@@ -2989,7 +2989,7 @@ def _union(self, other: Index, sort):
             result = algos.union_with_duplicates(lvals, rvals)  # type: ignore[arg-type]
             return _maybe_try_sort(result, sort)
 
-        # Either other or self is not unique
+        # Self may have duplicates
         # find indexes of things in "other" that are not in "self"
         if self.is_unique:
             indexer = self.get_indexer(other)

diff --git a/pandas/core/indexes/multi.py b/pandas/core/indexes/multi.py
@@ -3568,14 +3568,9 @@ def equal_levels(self, other: MultiIndex) -> bool:
 
     def _union(self, other, sort) -> MultiIndex:
         other, result_names = self._convert_can_do_setop(other)
+        result = super()._union(other, sort)
 
-        # We could get here with CategoricalIndex other
-        rvals = other._values.astype(object, copy=False)
-        uniq_tuples = lib.fast_unique_multiple([self._values, rvals], sort=sort)
-
-        return MultiIndex.from_arrays(
-            zip(*uniq_tuples), sortorder=0, names=result_names
-        )
+        return MultiIndex.from_tuples(result, sortorder=0, names=result_names)
 
     def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
         return is_object_dtype(dtype)

diff --git a/pandas/tests/indexes/multi/test_setops.py b/pandas/tests/indexes/multi/test_setops.py
@@ -3,7 +3,9 @@
 
 import pandas as pd
 from pandas import (
+    CategoricalIndex,
     Index,
+    IntervalIndex,
     MultiIndex,
     Series,
 )
@@ -496,3 +498,23 @@ def test_intersection_with_missing_values_on_both_sides(nulls_fixture):
     result = mi1.intersection(mi2)
     expected = MultiIndex.from_arrays([[3.0, nulls_fixture], [1, 2]])
     tm.assert_index_equal(result, expected)
+
+
+def test_union_nan_got_duplicated():
+    # GH#38977
+    mi1 = MultiIndex.from_arrays([[1.0, np.nan], [2, 3]])
+    mi2 = MultiIndex.from_arrays([[1.0, np.nan, 3.0], [2, 3, 4]])
+    result = mi1.union(mi2)
+    tm.assert_index_equal(result, mi2)
+
+
+def test_union_duplicates(index):
+    # GH#38977
+    if index.empty or isinstance(index, (IntervalIndex, CategoricalIndex)):
+        # No duplicates in empty indexes
+        return
+    values = index.unique().values.tolist()
+    mi1 = MultiIndex.from_arrays([values, [1] * len(values)])
+    mi2 = MultiIndex.from_arrays([[values[0]] + values, [1] * (len(values) + 1)])
+    result = mi1.union(mi2)
+    tm.assert_index_equal(result, mi2.sort_values())
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -579,6 +579,29 @@ def test_union_nan_in_both(dup):
     tm.assert_index_equal(result, expected)
 
 
+@pytest.mark.parametrize(
+    "cls",
+    [
+        Int64Index,
+        Float64Index,
+        DatetimeIndex,
+        TimedeltaIndex,
+        lambda x: Index(x, dtype=object),
+    ],
+)
+def test_union_with_duplicate_index_not_subset_and_non_monotonic(cls):
+    # GH#36289
+    a = cls([1, 0, 0, 2])
+    b = cls([0, 1])
+    expected = cls([0, 0, 1, 2])
+
+    result = a.union(b)
+    tm.assert_index_equal(result, expected)
+
+    result = a.union(b)
+    tm.assert_index_equal(result, expected)
+
+
 class TestSetOpsUnsorted:
     # These may eventually belong in a dtype-specific test_setops, or
     #  parametrized over a more general fixture

diff --git a/pandas/tests/libs/test_lib.py b/pandas/tests/libs/test_lib.py
@@ -2,7 +2,6 @@
 import pytest
 
 from pandas._libs import (
-    Timestamp,
     lib,
     writers as libwriters,
 )
@@ -43,11 +42,6 @@ def test_fast_unique_multiple_list_gen_sort(self):
         out = lib.fast_unique_multiple_list_gen(gen, sort=False)
         tm.assert_numpy_array_equal(np.array(out), expected)
 
-    def test_fast_unique_multiple_unsortable_runtimewarning(self):
-        arr = [np.array(["foo", Timestamp("2000")])]
-        with tm.assert_produces_warning(RuntimeWarning):
-            lib.fast_unique_multiple(arr, sort=None)
-
 
 class TestIndexing:
     def test_maybe_indices_to_slice_left_edge(self):