Skip to content

REF: _is_compatible_with_other -> _can_union_without_object_cast #36384

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Sep 18, 2020
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.2.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ Other
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
-
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)

.. ---------------------------------------------------------------------------

Expand Down
12 changes: 10 additions & 2 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2507,7 +2507,7 @@ def _union_incompatible_dtypes(self, other, sort):
other = Index(other).astype(object, copy=False)
return Index.union(this, other, sort=sort).astype(object, copy=False)

def _is_compatible_with_other(self, other) -> bool:
def _can_union_without_object_cast(self, other) -> bool:
"""
Check whether this and the other dtype are compatible with each other.
Meaning a union can be formed between them without needing to be cast
Expand Down Expand Up @@ -2583,8 +2583,9 @@ def union(self, other, sort=None):
"""
self._validate_sort_keyword(sort)
self._assert_can_do_setop(other)
other = ensure_index(other)

if not self._is_compatible_with_other(other):
if not self._can_union_without_object_cast(other):
return self._union_incompatible_dtypes(other, sort=sort)

return self._union(other, sort=sort)
Expand Down Expand Up @@ -5655,6 +5656,13 @@ def ensure_index(

return MultiIndex.from_arrays(converted)
else:
if isinstance(converted, np.ndarray) and converted.dtype == np.int64:
# Check for overflows if we should actually be uint64
# xref GH#35481
alt = np.asarray(index_like)
if alt.dtype == np.uint64:
converted = alt

index_like = converted
else:
# clean_index_list does the equivalent of copying
Expand Down
3 changes: 3 additions & 0 deletions pandas/core/indexes/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -622,6 +622,9 @@ def insert(self, loc: int, item):
# --------------------------------------------------------------------
# Join/Set Methods

def _can_union_without_object_cast(self, other) -> bool:
return is_dtype_equal(self.dtype, other.dtype)

def _wrap_joined_index(self, joined: np.ndarray, other):
assert other.dtype == self.dtype, (other.dtype, self.dtype)
name = get_op_result_name(self, other)
Expand Down
34 changes: 11 additions & 23 deletions pandas/core/indexes/numeric.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,19 +15,14 @@
is_float,
is_float_dtype,
is_integer_dtype,
is_numeric_dtype,
is_scalar,
is_signed_integer_dtype,
is_unsigned_integer_dtype,
needs_i8_conversion,
pandas_dtype,
)
from pandas.core.dtypes.generic import (
ABCFloat64Index,
ABCInt64Index,
ABCRangeIndex,
ABCSeries,
ABCUInt64Index,
)
from pandas.core.dtypes.generic import ABCSeries
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna

from pandas.core import algorithms
Expand Down Expand Up @@ -275,11 +270,9 @@ def _assert_safe_casting(cls, data, subarr):
if not np.array_equal(data, subarr):
raise TypeError("Unsafe NumPy casting, you must explicitly cast")

def _is_compatible_with_other(self, other) -> bool:
return super()._is_compatible_with_other(other) or all(
isinstance(obj, (ABCInt64Index, ABCFloat64Index, ABCRangeIndex))
for obj in [self, other]
)
def _can_union_without_object_cast(self, other) -> bool:
# See GH#26778, further casting may occur in NumericIndex._union
return other.dtype == "f8" or other.dtype == self.dtype


Int64Index._add_numeric_methods()
Expand Down Expand Up @@ -324,10 +317,9 @@ def _assert_safe_casting(cls, data, subarr):
if not np.array_equal(data, subarr):
raise TypeError("Unsafe NumPy casting, you must explicitly cast")

def _is_compatible_with_other(self, other) -> bool:
return super()._is_compatible_with_other(other) or all(
isinstance(obj, (ABCUInt64Index, ABCFloat64Index)) for obj in [self, other]
)
def _can_union_without_object_cast(self, other) -> bool:
# See GH#26778, further casting may occur in NumericIndex._union
return other.dtype == "f8" or other.dtype == self.dtype


UInt64Index._add_numeric_methods()
Expand Down Expand Up @@ -432,13 +424,9 @@ def isin(self, values, level=None):
self._validate_index_level(level)
return algorithms.isin(np.array(self), values)

def _is_compatible_with_other(self, other) -> bool:
return super()._is_compatible_with_other(other) or all(
isinstance(
obj, (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex)
)
for obj in [self, other]
)
def _can_union_without_object_cast(self, other) -> bool:
# See GH#26778, further casting may occur in NumericIndex._union
return is_numeric_dtype(other.dtype)


Float64Index._add_numeric_methods()
Expand Down
18 changes: 6 additions & 12 deletions pandas/tests/indexes/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import pytest

from pandas._libs import iNaT
from pandas.compat.numpy import is_numpy_dev
from pandas.errors import InvalidIndexError

from pandas.core.dtypes.common import is_datetime64tz_dtype
Expand Down Expand Up @@ -456,7 +455,7 @@ def test_set_ops_error_cases(self, case, method, index):
with pytest.raises(TypeError, match=msg):
getattr(index, method)(case)

def test_intersection_base(self, index, request):
def test_intersection_base(self, index):
if isinstance(index, CategoricalIndex):
return

Expand All @@ -473,15 +472,6 @@ def test_intersection_base(self, index, request):
# GH 10149
cases = [klass(second.values) for klass in [np.array, Series, list]]
for case in cases:
# https://github.com/pandas-dev/pandas/issues/35481
if (
is_numpy_dev
and isinstance(case, Series)
and isinstance(index, UInt64Index)
):
mark = pytest.mark.xfail(reason="gh-35481")
request.node.add_marker(mark)

result = first.intersection(case)
assert tm.equalContents(result, second)

Expand All @@ -507,7 +497,11 @@ def test_union_base(self, index):
for case in cases:
if not isinstance(index, CategoricalIndex):
result = first.union(case)
assert tm.equalContents(result, everything)
assert tm.equalContents(result, everything), (
result,
everything,
type(case),
)

if isinstance(index, MultiIndex):
msg = "other must be a MultiIndex or a list of tuples"
Expand Down
6 changes: 2 additions & 4 deletions pandas/tests/indexes/datetimes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,8 @@ def test_union3(self, sort, box):
first = everything[:5]
second = everything[5:]

# GH 10149
expected = (
first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O")
)
# GH 10149 support listlike inputs other than Index objects
expected = first.union(second, sort=sort)
case = box(second.values)
result = first.union(case, sort=sort)
tm.assert_index_equal(result, expected)
Expand Down