Skip to content

REF: Simplify Index.union #41773

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jun 2, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 14 additions & 15 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,6 @@
is_float_dtype,
is_hashable,
is_integer,
is_integer_dtype,
is_interval_dtype,
is_iterator,
is_list_like,
Expand Down Expand Up @@ -2963,20 +2962,7 @@ def union(self, other, sort=None):
stacklevel=2,
)

dtype = find_common_type([self.dtype, other.dtype])
if self._is_numeric_dtype and other._is_numeric_dtype:
# Right now, we treat union(int, float) a bit special.
# See https://github.com/pandas-dev/pandas/issues/26778 for discussion
# We may change union(int, float) to go to object.
# float | [u]int -> float (the special case)
# <T> | <T> -> T
# <T> | <U> -> object
if not (is_integer_dtype(self.dtype) and is_integer_dtype(other.dtype)):
dtype = np.dtype("float64")
else:
# one is int64 other is uint64
dtype = np.dtype("object")

dtype = self._find_common_type_compat(other)
left = self.astype(dtype, copy=False)
right = other.astype(dtype, copy=False)
return left.union(right, sort=sort)
Expand Down Expand Up @@ -5410,6 +5396,19 @@ def _find_common_type_compat(self, target) -> DtypeObj:
return IntervalDtype(np.float64, closed=self.closed)

target_dtype, _ = infer_dtype_from(target, pandas_dtype=True)

# special case: if one dtype is uint64 and the other a signed int, return object
# See https://github.com/pandas-dev/pandas/issues/26778 for discussion
# Now it's:
# * float | [u]int -> float
# * uint64 | signed int -> object
# We may change union(float | [u]int) to go to object.
if self.dtype == "uint64" or target_dtype == "uint64":
if is_signed_integer_dtype(self.dtype) or is_signed_integer_dtype(
target_dtype
):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in principle we could check signed_one.min() and if its non-negative could avoid the object-dtype cast. not for this PR.

return np.dtype("object")

dtype = find_common_type([self.dtype, target_dtype])
if dtype.kind in ["i", "u"]:
# TODO: what about reversed with self being categorical?
Expand Down
26 changes: 10 additions & 16 deletions pandas/tests/indexes/test_setops.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import numpy as np
import pytest

from pandas.core.dtypes.common import is_dtype_equal
from pandas.core.dtypes.cast import find_common_type

from pandas import (
CategoricalIndex,
Expand All @@ -25,6 +25,7 @@
import pandas._testing as tm
from pandas.api.types import (
is_datetime64tz_dtype,
is_signed_integer_dtype,
pandas_dtype,
)

Expand All @@ -48,7 +49,11 @@ def test_union_different_types(index_flat, index_flat2):
idx1 = index_flat
idx2 = index_flat2

type_pair = tuple(sorted([idx1.dtype.type, idx2.dtype.type], key=lambda x: str(x)))
common_dtype = find_common_type([idx1.dtype, idx2.dtype])

any_uint64 = idx1.dtype == np.uint64 or idx2.dtype == np.uint64
idx1_signed = is_signed_integer_dtype(idx1.dtype)
idx2_signed = is_signed_integer_dtype(idx2.dtype)

# Union with a non-unique, non-monotonic index raises error
# This applies to the boolean index
Expand All @@ -58,23 +63,12 @@ def test_union_different_types(index_flat, index_flat2):
res1 = idx1.union(idx2)
res2 = idx2.union(idx1)

if is_dtype_equal(idx1.dtype, idx2.dtype):
assert res1.dtype == idx1.dtype
assert res2.dtype == idx1.dtype

elif type_pair not in COMPATIBLE_INCONSISTENT_PAIRS:
# A union with a CategoricalIndex (even as dtype('O')) and a
# non-CategoricalIndex can only be made if both indices are monotonic.
# This is true before this PR as well.
if any_uint64 and (idx1_signed or idx2_signed):
assert res1.dtype == np.dtype("O")
assert res2.dtype == np.dtype("O")

elif idx1.dtype.kind in ["f", "i", "u"] and idx2.dtype.kind in ["f", "i", "u"]:
assert res1.dtype == np.dtype("f8")
assert res2.dtype == np.dtype("f8")

else:
raise NotImplementedError
assert res1.dtype == common_dtype
assert res2.dtype == common_dtype


@pytest.mark.parametrize(
Expand Down