Skip to content

API/REGR: Convert to float for index union #27034

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Jun 27, 2019
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 64 additions & 3 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@

from pandas.core.dtypes.common import (
is_bool, is_bool_dtype, is_dtype_equal, is_extension_array_dtype, is_float,
is_integer_dtype, is_scalar, needs_i8_conversion, pandas_dtype)
is_float_dtype, is_integer_dtype, is_scalar, needs_i8_conversion,
pandas_dtype)
import pandas.core.dtypes.concat as _concat
from pandas.core.dtypes.generic import ABCInt64Index, ABCRangeIndex
from pandas.core.dtypes.generic import (
ABCFloat64Index, ABCInt64Index, ABCRangeIndex, ABCUInt64Index)
from pandas.core.dtypes.missing import isna

from pandas.core import algorithms
Expand Down Expand Up @@ -123,6 +125,21 @@ def insert(self, loc, item):
item = self._na_value
return super().insert(loc, item)

def _union(self, other, sort):
# float | [u]int -> float
# <T> | <T> -> T
# <T> | <U> -> object
needs_cast = (
(is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
(is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
)
if needs_cast:
first = self.astype("float")
second = other.astype("float")
return first._union(second, sort)
else:
return super()._union(other, sort)


_num_index_shared_docs['class_descr'] = """
Immutable ndarray implementing an ordered, sliceable set. The basic object
Expand Down Expand Up @@ -225,10 +242,24 @@ def _assert_safe_casting(cls, data, subarr):
def _is_compatible_with_other(self, other):
return (
super()._is_compatible_with_other(other)
or all(isinstance(type(obj), (ABCInt64Index, ABCRangeIndex))
or all(isinstance(type(obj), (ABCInt64Index,
ABCFloat64Index,
ABCRangeIndex))
for obj in [self, other])
)

def _union(self, other, sort):
needs_cast = (
(is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
(is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
)
if needs_cast:
first = self.astype("float")
second = other.astype("float")
return first._union(second, sort)
else:
return super()._union(other, sort)


Int64Index._add_numeric_methods()
Int64Index._add_logical_methods()
Expand Down Expand Up @@ -301,6 +332,26 @@ def _assert_safe_casting(cls, data, subarr):
raise TypeError('Unsafe NumPy casting, you must '
'explicitly cast')

def _is_compatible_with_other(self, other):
return (
super()._is_compatible_with_other(other)
or all(isinstance(type(obj), (ABCUInt64Index,
ABCFloat64Index))
for obj in [self, other])
)

def _union(self, other, sort):
needs_cast = (
(is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or
(is_integer_dtype(other.dtype) and is_float_dtype(self.dtype))
)
if needs_cast:
first = self.astype("float")
second = other.astype("float")
return first._union(second, sort)
else:
return super()._union(other, sort)


UInt64Index._add_numeric_methods()
UInt64Index._add_logical_methods()
Expand Down Expand Up @@ -447,6 +498,16 @@ def isin(self, values, level=None):
self._validate_index_level(level)
return algorithms.isin(np.array(self), values)

def _is_compatible_with_other(self, other):
return (
super()._is_compatible_with_other(other)
or all(isinstance(type(obj), (ABCInt64Index,
ABCFloat64Index,
ABCUInt64Index,
ABCRangeIndex))
for obj in [self, other])
)


Float64Index._add_numeric_methods()
Float64Index._add_logical_methods_disabled()
24 changes: 24 additions & 0 deletions pandas/tests/indexes/test_numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -1118,3 +1118,27 @@ def test_join_outer(self):
tm.assert_index_equal(res, eres)
tm.assert_numpy_array_equal(lidx, elidx)
tm.assert_numpy_array_equal(ridx, eridx)


@pytest.mark.parametrize("dtype", ['int64', 'uint64'])
def test_int_float_union_dtype(dtype):
# [u]int | float -> float
index = pd.Index([0, 2, 3], dtype=dtype)
other = pd.Float64Index([0.5, 1.5])
expected = pd.Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
# result = index.union(other)
# tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)


def test_range_float_union_dtype():
index = pd.RangeIndex(start=0, stop=3)
other = pd.Float64Index([0.5, 1.5])
result = index.union(other)
expected = pd.Float64Index([0.0, 0.5, 1, 1.5, 2.0])
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)
34 changes: 32 additions & 2 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,16 @@
from pandas.core.dtypes.common import is_dtype_equal

import pandas as pd
from pandas import Int64Index, RangeIndex
from pandas import Float64Index, Int64Index, RangeIndex, UInt64Index
from pandas.api.types import pandas_dtype
from pandas.tests.indexes.conftest import indices_list
import pandas.util.testing as tm

COMPATIBLE_INCONSISTENT_PAIRS = {
(Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex)
(Int64Index, RangeIndex): (tm.makeIntIndex, tm.makeRangeIndex),
(Float64Index, Int64Index): (tm.makeFloatIndex, tm.makeIntIndex),
(Float64Index, RangeIndex): (tm.makeFloatIndex, tm.makeIntIndex),
(Float64Index, UInt64Index): (tm.makeFloatIndex, tm.makeUIntIndex),
}


Expand Down Expand Up @@ -74,3 +78,29 @@ def test_compatible_inconsistent_pairs(idx_fact1, idx_fact2):

assert res1.dtype in (idx1.dtype, idx2.dtype)
assert res2.dtype in (idx1.dtype, idx2.dtype)


@pytest.mark.parametrize('left, right, expected', [
('int64', 'int64', 'int64'),
('int64', 'uint64', 'object'),
('int64', 'float64', 'float64'),
('uint64', 'float64', 'float64'),
('uint64', 'uint64', 'uint64'),
('float64', 'float64', 'float64'),
('datetime64[ns]', 'int64', 'object'),
('datetime64[ns]', 'uint64', 'object'),
('datetime64[ns]', 'float64', 'object'),
('datetime64[ns, CET]', 'int64', 'object'),
('datetime64[ns, CET]', 'uint64', 'object'),
('datetime64[ns, CET]', 'float64', 'object'),
('Period[D]', 'int64', 'object'),
('Period[D]', 'uint64', 'object'),
('Period[D]', 'float64', 'object'),
])
def test_union_dtypes(left, right, expected):
left = pandas_dtype(left)
right = pandas_dtype(right)
a = pd.Index([], dtype=left)
b = pd.Index([], dtype=right)
result = (a | b).dtype
assert result == expected