API/REGR: Convert to float for index union

TomAugspurger · TomAugspurger · commit 2fd1fe72af7f · 2019-06-25T06:51:27.000-05:00
This restores the 0.24.x behavior of Index.union(other) between Float and (U)Int indexes. These are now floating dtype. left | right | output of left.union(right) ----- | ----- | ------ int |float | float64 int |uint | object float | uint | float64 pandas-dev#26778 (comment) Closes pandas-dev#26778
diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py
@@ -7,9 +7,11 @@
 
 from pandas.core.dtypes.common import (
     is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float,
-    is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype)
+    is_float_dtype, is_integer_dtype, is_scalar, needs_i8_conversion,
+    pandas_dtype)
 import pandas.core.dtypes.concat as _concat
-from pandas.core.dtypes.generic import ABCInt64Index, ABCRangeIndex
+from pandas.core.dtypes.generic import (
+    ABCFloat64Index, ABCInt64Index, ABCRangeIndex, ABCUInt64Index)
 from pandas.core.dtypes.missing import isna
 
 from pandas.core import algorithms
@@ -123,6 +125,21 @@ def insert(self, loc, item):
             item = self._na_value
         return super().insert(loc, item)
 
+    def _union(self, other, sort):
+        # float | [u]int -> float
+        # <T>   | <T>    -> T
+        # <T>   | <U>    -> object
+        needs_cast = (
+            (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
+            (is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
+        )
+        if needs_cast:
+            first = self.astype("float")
+            second = other.astype("float")
+            return first._union(second, sort)
+        else:
+            return super()._union(other, sort)
+
 
 _num_index_shared_docs['class_descr'] = """
     Immutable ndarray implementing an ordered, sliceable set. The basic object
@@ -225,10 +242,24 @@ def _assert_safe_casting(cls, data, subarr):
     def _is_compatible_with_other(self, other):
         return (
             super()._is_compatible_with_other(other)
-            or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex))
+            or all(isinstance(type(obj), (ABCInt64Index,
+                                          ABCFloat64Index,
+                                          ABCRangeIndex))
                    for obj in [self, other])
         )
 
+    def _union(self, other, sort):
+        needs_cast = (
+            (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
+            (is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
+        )
+        if needs_cast:
+            first = self.astype("float")
+            second = other.astype("float")
+            return first._union(second, sort)
+        else:
+            return super()._union(other, sort)
+
 
 Int64Index._add_numeric_methods()
 Int64Index._add_logical_methods()
@@ -301,6 +332,29 @@ def _assert_safe_casting(cls, data, subarr):
                 raise TypeError('Unsafe NumPy casting, you must '
                                 'explicitly cast')
 
+    def _is_compatible_with_other(self, other):
+        # not ABCInt64Index
+        # TODO: dedpulicate with Int64Index.
+        # TODO: who all needs this? Int, UInt, Float? Range?
+        return (
+            super()._is_compatible_with_other(other)
+            or all(isinstance(type(obj), (ABCUInt64Index,
+                                          ABCFloat64Index))
+                   for obj in [self, other])
+        )
+
+    def _union(self, other, sort):
+        needs_cast = (
+            (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
+            (is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
+        )
+        if needs_cast:
+            first = self.astype("float")
+            second = other.astype("float")
+            return first._union(second, sort)
+        else:
+            return super()._union(other, sort)
+
 
 UInt64Index._add_numeric_methods()
 UInt64Index._add_logical_methods()
@@ -447,6 +501,16 @@ def isin(self, values, level=None):
             self._validate_index_level(level)
         return algorithms.isin(np.array(self), values)
 
+    def _is_compatible_with_other(self, other):
+        return (
+            super()._is_compatible_with_other(other)
+            or all(isinstance(type(obj), (ABCInt64Index,
+                                          ABCFloat64Index,
+                                          ABCUInt64Index,
+                                          ABCRangeIndex))
+                   for obj in [self, other])
+        )
+
 
 Float64Index._add_numeric_methods()
 Float64Index._add_logical_methods_disabled()
diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py
@@ -1118,3 +1118,27 @@ def test_join_outer(self):
         tm.assert_index_equal(res, eres)
         tm.assert_numpy_array_equal(lidx, elidx)
         tm.assert_numpy_array_equal(ridx, eridx)
+
+
+@pytest.mark.parametrize("dtype", ['int64', 'uint64'])
+def test_int_float_union_dtype(dtype):
+    # [u]int | float -> float
+    index = pd.Index([0, 2, 3], dtype=dtype)
+    other = pd.Float64Index([0.5, 1.5])
+    expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
+    # result = index.union(other)
+    # tm.assert_index_equal(result, expected)
+
+    result = other.union(index)
+    tm.assert_index_equal(result, expected)
+
+
+def test_range_float_union_dtype():
+    index = pd.RangeIndex(start=0, stop=3)
+    other = pd.Float64Index([0.5, 1.5])
+    result = index.union(other)
+    expected = pd.Float64Index([0.0, 0.5, 1, 1.5, 2.0])
+    tm.assert_index_equal(result, expected)
+
+    result = other.union(index)
+    tm.assert_index_equal(result, expected)
diff --git a/pandas/tests/indexes/test_setops.py b/pandas/tests/indexes/test_setops.py
@@ -10,12 +10,16 @@
 from pandas.core.dtypes.common import is_dtype_equal
 
 import pandas as pd
-from pandas import Int64Index, RangeIndex
+from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index
+from pandas.api.types import pandas_dtype
 from pandas.tests.indexes.conftest import indices_list
 import pandas.util.testing as tm
 
 COMPATIBLE_INCONSISTENT_PAIRS = {
-    (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex)
+    (Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex),
+    (Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex),
+    (Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex),
+    (Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex),
 }
 
 
@@ -74,3 +78,29 @@ def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):
 
     assert res1.dtype in (idx1.dtype, idx2.dtype)
     assert res2.dtype in (idx1.dtype, idx2.dtype)
+
+
+@pytest.mark.parametrize('left, right, expected', [
+    ('int64', 'int64', 'int64'),
+    ('int64', 'uint64', 'object'),
+    ('int64', 'float64', 'float64'),
+    ('uint64', 'float64', 'float64'),
+    ('uint64', 'uint64', 'uint64'),
+    ('float64', 'float64', 'float64'),
+    ('datetime64[ns]', 'int64', 'object'),
+    ('datetime64[ns]', 'uint64', 'object'),
+    ('datetime64[ns]', 'float64', 'object'),
+    ('datetime64[ns, CET]', 'int64', 'object'),
+    ('datetime64[ns, CET]', 'uint64', 'object'),
+    ('datetime64[ns, CET]', 'float64', 'object'),
+    ('Period[D]', 'int64', 'object'),
+    ('Period[D]', 'uint64', 'object'),
+    ('Period[D]', 'float64', 'object'),
+])
+def test_union_dtypes(left, right, expected):
+    left = pandas_dtype(left)
+    right = pandas_dtype(right)
+    a = pd.Index([], dtype=left)
+    b = pd.Index([], dtype=right)
+    result = (a | b).dtype
+    assert result == expected