Skip to content

Commit 56eb167

Browse files
authored
REF: _is_compatible_with_other -> _can_union_without_object_cast (#36384)
1 parent d2e958b commit 56eb167

File tree

6 files changed

+33
-42
lines changed

6 files changed

+33
-42
lines changed

doc/source/whatsnew/v1.2.0.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -367,7 +367,7 @@ Other
367367
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
368368
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
369369
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
370-
-
370+
- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`)
371371

372372
.. ---------------------------------------------------------------------------
373373

pandas/core/indexes/base.py

+10-2
Original file line numberDiff line numberDiff line change
@@ -2511,7 +2511,7 @@ def _union_incompatible_dtypes(self, other, sort):
25112511
other = Index(other).astype(object, copy=False)
25122512
return Index.union(this, other, sort=sort).astype(object, copy=False)
25132513

2514-
def _is_compatible_with_other(self, other) -> bool:
2514+
def _can_union_without_object_cast(self, other) -> bool:
25152515
"""
25162516
Check whether this and the other dtype are compatible with each other.
25172517
Meaning a union can be formed between them without needing to be cast
@@ -2587,8 +2587,9 @@ def union(self, other, sort=None):
25872587
"""
25882588
self._validate_sort_keyword(sort)
25892589
self._assert_can_do_setop(other)
2590+
other = ensure_index(other)
25902591

2591-
if not self._is_compatible_with_other(other):
2592+
if not self._can_union_without_object_cast(other):
25922593
return self._union_incompatible_dtypes(other, sort=sort)
25932594

25942595
return self._union(other, sort=sort)
@@ -5657,6 +5658,13 @@ def ensure_index(
56575658

56585659
return MultiIndex.from_arrays(converted)
56595660
else:
5661+
if isinstance(converted, np.ndarray) and converted.dtype == np.int64:
5662+
# Check for overflows if we should actually be uint64
5663+
# xref GH#35481
5664+
alt = np.asarray(index_like)
5665+
if alt.dtype == np.uint64:
5666+
converted = alt
5667+
56605668
index_like = converted
56615669
else:
56625670
# clean_index_list does the equivalent of copying

pandas/core/indexes/datetimelike.py

+3
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,9 @@ def insert(self, loc: int, item):
622622
# --------------------------------------------------------------------
623623
# Join/Set Methods
624624

625+
def _can_union_without_object_cast(self, other) -> bool:
626+
return is_dtype_equal(self.dtype, other.dtype)
627+
625628
def _wrap_joined_index(self, joined: np.ndarray, other):
626629
assert other.dtype == self.dtype, (other.dtype, self.dtype)
627630
name = get_op_result_name(self, other)

pandas/core/indexes/numeric.py

+11-23
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,14 @@
1515
is_float,
1616
is_float_dtype,
1717
is_integer_dtype,
18+
is_numeric_dtype,
1819
is_scalar,
1920
is_signed_integer_dtype,
2021
is_unsigned_integer_dtype,
2122
needs_i8_conversion,
2223
pandas_dtype,
2324
)
24-
from pandas.core.dtypes.generic import (
25-
ABCFloat64Index,
26-
ABCInt64Index,
27-
ABCRangeIndex,
28-
ABCSeries,
29-
ABCUInt64Index,
30-
)
25+
from pandas.core.dtypes.generic import ABCSeries
3126
from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna
3227

3328
from pandas.core import algorithms
@@ -275,11 +270,9 @@ def _assert_safe_casting(cls, data, subarr):
275270
if not np.array_equal(data, subarr):
276271
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
277272

278-
def _is_compatible_with_other(self, other) -> bool:
279-
return super()._is_compatible_with_other(other) or all(
280-
isinstance(obj, (ABCInt64Index, ABCFloat64Index, ABCRangeIndex))
281-
for obj in [self, other]
282-
)
273+
def _can_union_without_object_cast(self, other) -> bool:
274+
# See GH#26778, further casting may occur in NumericIndex._union
275+
return other.dtype == "f8" or other.dtype == self.dtype
283276

284277

285278
Int64Index._add_numeric_methods()
@@ -324,10 +317,9 @@ def _assert_safe_casting(cls, data, subarr):
324317
if not np.array_equal(data, subarr):
325318
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
326319

327-
def _is_compatible_with_other(self, other) -> bool:
328-
return super()._is_compatible_with_other(other) or all(
329-
isinstance(obj, (ABCUInt64Index, ABCFloat64Index)) for obj in [self, other]
330-
)
320+
def _can_union_without_object_cast(self, other) -> bool:
321+
# See GH#26778, further casting may occur in NumericIndex._union
322+
return other.dtype == "f8" or other.dtype == self.dtype
331323

332324

333325
UInt64Index._add_numeric_methods()
@@ -432,13 +424,9 @@ def isin(self, values, level=None):
432424
self._validate_index_level(level)
433425
return algorithms.isin(np.array(self), values)
434426

435-
def _is_compatible_with_other(self, other) -> bool:
436-
return super()._is_compatible_with_other(other) or all(
437-
isinstance(
438-
obj, (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex)
439-
)
440-
for obj in [self, other]
441-
)
427+
def _can_union_without_object_cast(self, other) -> bool:
428+
# See GH#26778, further casting may occur in NumericIndex._union
429+
return is_numeric_dtype(other.dtype)
442430

443431

444432
Float64Index._add_numeric_methods()

pandas/tests/indexes/common.py

+6-12
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
import pytest
66

77
from pandas._libs import iNaT
8-
from pandas.compat.numpy import is_numpy_dev
98
from pandas.errors import InvalidIndexError
109

1110
from pandas.core.dtypes.common import is_datetime64tz_dtype
@@ -456,7 +455,7 @@ def test_set_ops_error_cases(self, case, method, index):
456455
with pytest.raises(TypeError, match=msg):
457456
getattr(index, method)(case)
458457

459-
def test_intersection_base(self, index, request):
458+
def test_intersection_base(self, index):
460459
if isinstance(index, CategoricalIndex):
461460
return
462461

@@ -473,15 +472,6 @@ def test_intersection_base(self, index, request):
473472
# GH 10149
474473
cases = [klass(second.values) for klass in [np.array, Series, list]]
475474
for case in cases:
476-
# https://github.com/pandas-dev/pandas/issues/35481
477-
if (
478-
is_numpy_dev
479-
and isinstance(case, Series)
480-
and isinstance(index, UInt64Index)
481-
):
482-
mark = pytest.mark.xfail(reason="gh-35481")
483-
request.node.add_marker(mark)
484-
485475
result = first.intersection(case)
486476
assert tm.equalContents(result, second)
487477

@@ -507,7 +497,11 @@ def test_union_base(self, index):
507497
for case in cases:
508498
if not isinstance(index, CategoricalIndex):
509499
result = first.union(case)
510-
assert tm.equalContents(result, everything)
500+
assert tm.equalContents(result, everything), (
501+
result,
502+
everything,
503+
type(case),
504+
)
511505

512506
if isinstance(index, MultiIndex):
513507
msg = "other must be a MultiIndex or a list of tuples"

pandas/tests/indexes/datetimes/test_setops.py

+2-4
Original file line numberDiff line numberDiff line change
@@ -46,10 +46,8 @@ def test_union3(self, sort, box):
4646
first = everything[:5]
4747
second = everything[5:]
4848

49-
# GH 10149
50-
expected = (
51-
first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O")
52-
)
49+
# GH 10149 support listlike inputs other than Index objects
50+
expected = first.union(second, sort=sort)
5351
case = box(second.values)
5452
result = first.union(case, sort=sort)
5553
tm.assert_index_equal(result, expected)

0 commit comments

Comments
 (0)