pandas-dev · jreback · Jun 27, 2019 · Jun 24, 2019 · Jun 25, 2019 · Jun 25, 2019
diff --git a/doc/source/user_guide/indexing.rst b/doc/source/user_guide/indexing.rst
@@ -1559,11 +1559,11 @@ See :ref:`Advanced Indexing <advanced>` for usage of MultiIndexes.
   index.levels[1]
   index.set_levels(["a", "b"], level=1)
 
+.. _indexing.set_ops:
+
 Set operations on Index objects
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-.. _indexing.set_ops:
-
 The two main operations are ``union (|)`` and ``intersection (&)``.
 These can be directly called as instance methods or used via overloaded
 operators. Difference is provided via the ``.difference()`` method.
@@ -1592,11 +1592,22 @@ with duplicates dropped.
 
    The resulting index from a set operation will be sorted in ascending order.
 
-Missing values
-~~~~~~~~~~~~~~
+When performing :meth:`Index.union` between indexes with different dtypes, the indexes
+must be cast to a common dtype. Typically, though not always, this is object dtype. The
+exception is when performing a union between integer and float data. In this case, the
+integer values are converted to float
+
+.. ipython:: python
+
+   idx1 = pd.Index([0, 1, 2])
+   idx2 = pd.Index([0.5, 1.5])
+   idx1 | idx2
 
 .. _indexing.missing:
 
+Missing values
+~~~~~~~~~~~~~~
+
 .. important::
 
    Even though ``Index`` can hold missing values (``NaN``), it should be avoided
@@ -1624,11 +1635,11 @@ Occasionally you will load or create a data set into a DataFrame and want to
 add an index after you've already done so. There are a couple of different
 ways.
 
+.. _indexing.set_index:
+
 Set an index
 ~~~~~~~~~~~~
 
-.. _indexing.set_index:
-
 DataFrame has a :meth:`~DataFrame.set_index` method which takes a column name
 (for a regular ``Index``) or a list of column names (for a ``MultiIndex``).
 To create a new, re-indexed DataFrame:

diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst
@@ -347,6 +347,11 @@ considered commutative, such that ``A.union(B) == B.union(A)`` (:issue:`23525`).
     pd.period_range('19910905', periods=2).union(pd.Int64Index([1, 2, 3]))
     pd.Index([], dtype=object).union(pd.Index([1, 2, 3]))
 
+Note that integer- and floating-dtype indexes are considered "compatible". The integer
+values are coerced to floating point, which may result in loss of precision. See
+:ref:`indexing.set_ops` for more.
+
+
 ``DataFrame`` groupby ffill/bfill no longer return group labels
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 

diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -7,9 +7,11 @@
 
 from pandas.core.dtypes.common import (
     is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float,
-    is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype)
+    is_float_dtype, is_integer_dtype, is_scalar, needs_i8_conversion,
+    pandas_dtype)
 import pandas.core.dtypes.concat as _concat
-from pandas.core.dtypes.generic import ABCInt64Index, ABCRangeIndex
+from pandas.core.dtypes.generic import (
+    ABCFloat64Index, ABCInt64Index, ABCRangeIndex, ABCUInt64Index)
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import algorithms
@@ -123,6 +125,24 @@ def insert(self, loc, item):
             item = self._na_value
         return super().insert(loc, item)
 
+    def _union(self, other, sort):
+        # Right now, we treat union(int, float) a bit special.
+        # See https://github.com/pandas-dev/pandas/issues/26778 for discussion
+        # We may change union(int, float) to go to object.
+        # float | [u]int -> float  (the special case)
+        # <T>   | <T>    -> T
+        # <T>   | <U>    -> object
+        needs_cast = (
+            (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
+            (is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
+        )
+        if needs_cast:
+            first = self.astype("float")
+            second = other.astype("float")
+            return first._union(second, sort)
+        else:
+            return super()._union(other, sort)
+
 
 _num_index_shared_docs['class_descr'] = """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -225,7 +245,9 @@ def _assert_safe_casting(cls, data, subarr):
     def _is_compatible_with_other(self, other):
         return (
             super()._is_compatible_with_other(other)
-            or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex))
+            or all(isinstance(type(obj), (ABCInt64Index,
+                                          ABCFloat64Index,
+                                          ABCRangeIndex))
                    for obj in [self, other])
         )
 
@@ -301,6 +323,14 @@ def _assert_safe_casting(cls, data, subarr):
                 raise TypeError('Unsafe NumPy casting, you must '
                                 'explicitly cast')
 
+    def _is_compatible_with_other(self, other):
+        return (
+            super()._is_compatible_with_other(other)
+            or all(isinstance(type(obj), (ABCUInt64Index,
+                                          ABCFloat64Index))
+                   for obj in [self, other])
+        )
+
 
 UInt64Index._add_numeric_methods()
 UInt64Index._add_logical_methods()
@@ -447,6 +477,16 @@ def isin(self, values, level=None):
             self._validate_index_level(level)
         return algorithms.isin(np.array(self), values)
 
+    def _is_compatible_with_other(self, other):
+        return (
+            super()._is_compatible_with_other(other)
+            or all(isinstance(type(obj), (ABCInt64Index,
+                                          ABCFloat64Index,
+                                          ABCUInt64Index,
+                                          ABCRangeIndex))
+                   for obj in [self, other])
+        )
+
 
 Float64Index._add_numeric_methods()
 Float64Index._add_logical_methods_disabled()
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -1118,3 +1118,29 @@ def test_join_outer(self):
         tm.assert_index_equal(res, eres)
         tm.assert_numpy_array_equal(lidx, elidx)
         tm.assert_numpy_array_equal(ridx, eridx)
+
+
+@pytest.mark.parametrize("dtype", ['int64', 'uint64'])
+def test_int_float_union_dtype(dtype):
+    # https://github.com/pandas-dev/pandas/issues/26778
+    # [u]int | float -> float
+    index = pd.Index([0, 2, 3], dtype=dtype)
+    other = pd.Float64Index([0.5, 1.5])
+    expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
+    # result = index.union(other)
+    # tm.assert_index_equal(result, expected)
+
+    result = other.union(index)
+    tm.assert_index_equal(result, expected)
+
+
+def test_range_float_union_dtype():
+    # https://github.com/pandas-dev/pandas/issues/26778
+    index = pd.RangeIndex(start=0, stop=3)
+    other = pd.Float64Index([0.5, 1.5])
+    result = index.union(other)
+    expected = pd.Float64Index([0.0, 0.5, 1, 1.5, 2.0])
+    tm.assert_index_equal(result, expected)
+
+    result = other.union(index)
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -2,6 +2,7 @@
 The tests in this package are to ensure the proper resultant dtypes of
 set operations.
 '''
+from collections import OrderedDict
 import itertools as it
 
 import numpy as np
@@ -10,13 +11,17 @@
 from pandas.core.dtypes.common import is_dtype_equal
 
 import pandas as pd
-from pandas import Int64Index, RangeIndex
+from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index
+from pandas.api.types import pandas_dtype
 from pandas.tests.indexes.conftest import indices_list
 import pandas.util.testing as tm
 
-COMPATIBLE_INCONSISTENT_PAIRS = {
-    (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex)
-}
+COMPATIBLE_INCONSISTENT_PAIRS = OrderedDict([
+    ((Int64Index, RangeIndex), (tm.makeIntIndex, tm.makeRangeIndex)),
+    ((Float64Index, Int64Index), (tm.makeFloatIndex, tm.makeIntIndex)),
+    ((Float64Index, RangeIndex), (tm.makeFloatIndex, tm.makeIntIndex)),
+    ((Float64Index, UInt64Index), (tm.makeFloatIndex, tm.makeUIntIndex)),
+])
 
 
 @pytest.fixture(params=list(it.combinations(indices_list, 2)),
@@ -74,3 +79,29 @@ def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
 
     assert res1.dtype in (idx1.dtype, idx2.dtype)
     assert res2.dtype in (idx1.dtype, idx2.dtype)
+
+
+@pytest.mark.parametrize('left, right, expected', [
+    ('int64', 'int64', 'int64'),
+    ('int64', 'uint64', 'object'),
+    ('int64', 'float64', 'float64'),
+    ('uint64', 'float64', 'float64'),
+    ('uint64', 'uint64', 'uint64'),
+    ('float64', 'float64', 'float64'),
+    ('datetime64[ns]', 'int64', 'object'),
+    ('datetime64[ns]', 'uint64', 'object'),
+    ('datetime64[ns]', 'float64', 'object'),
+    ('datetime64[ns, CET]', 'int64', 'object'),
+    ('datetime64[ns, CET]', 'uint64', 'object'),
+    ('datetime64[ns, CET]', 'float64', 'object'),
+    ('Period[D]', 'int64', 'object'),
+    ('Period[D]', 'uint64', 'object'),
+    ('Period[D]', 'float64', 'object'),
+])
+def test_union_dtypes(left, right, expected):
+    left = pandas_dtype(left)
+    right = pandas_dtype(right)
+    a = pd.Index([], dtype=left)
+    b = pd.Index([], dtype=right)
+    result = (a | b).dtype
+    assert result == expected