Skip to content

Commit 5af0b4a

Browse files
jbrockmendelluckyvs1
authored andcommitted
REF: unify Index union methods (pandas-dev#38382)
1 parent 1a7a892 commit 5af0b4a

File tree

5 files changed

+31
-74
lines changed

5 files changed

+31
-74
lines changed

pandas/core/indexes/base.py

+18-43
Original file line numberDiff line numberDiff line change
@@ -2592,47 +2592,6 @@ def _get_reconciled_name_object(self, other):
25922592
return self.rename(name)
25932593
return self
25942594

2595-
@final
2596-
def _union_incompatible_dtypes(self, other, sort):
2597-
"""
2598-
Casts this and other index to object dtype to allow the formation
2599-
of a union between incompatible types.
2600-
2601-
Parameters
2602-
----------
2603-
other : Index or array-like
2604-
sort : False or None, default False
2605-
Whether to sort the resulting index.
2606-
2607-
* False : do not sort the result.
2608-
* None : sort the result, except when `self` and `other` are equal
2609-
or when the values cannot be compared.
2610-
2611-
Returns
2612-
-------
2613-
Index
2614-
"""
2615-
this = self.astype(object, copy=False)
2616-
# cast to Index for when `other` is list-like
2617-
other = Index(other).astype(object, copy=False)
2618-
return Index.union(this, other, sort=sort).astype(object, copy=False)
2619-
2620-
def _can_union_without_object_cast(self, other) -> bool:
2621-
"""
2622-
Check whether this and the other dtype are compatible with each other.
2623-
Meaning a union can be formed between them without needing to be cast
2624-
to dtype object.
2625-
2626-
Parameters
2627-
----------
2628-
other : Index or array-like
2629-
2630-
Returns
2631-
-------
2632-
bool
2633-
"""
2634-
return type(self) is type(other) and is_dtype_equal(self.dtype, other.dtype)
2635-
26362595
@final
26372596
def _validate_sort_keyword(self, sort):
26382597
if sort not in [None, False]:
@@ -2696,8 +2655,24 @@ def union(self, other, sort=None):
26962655
self._assert_can_do_setop(other)
26972656
other, result_name = self._convert_can_do_setop(other)
26982657

2699-
if not self._can_union_without_object_cast(other):
2700-
return self._union_incompatible_dtypes(other, sort=sort)
2658+
if not is_dtype_equal(self.dtype, other.dtype):
2659+
dtype = find_common_type([self.dtype, other.dtype])
2660+
if self._is_numeric_dtype and other._is_numeric_dtype:
2661+
# Right now, we treat union(int, float) a bit special.
2662+
# See https://github.com/pandas-dev/pandas/issues/26778 for discussion
2663+
# We may change union(int, float) to go to object.
2664+
# float | [u]int -> float (the special case)
2665+
# <T> | <T> -> T
2666+
# <T> | <U> -> object
2667+
if not (is_integer_dtype(self.dtype) and is_integer_dtype(other.dtype)):
2668+
dtype = "float64"
2669+
else:
2670+
# one is int64 other is uint64
2671+
dtype = object
2672+
2673+
left = self.astype(dtype, copy=False)
2674+
right = other.astype(dtype, copy=False)
2675+
return left.union(right, sort=sort)
27012676

27022677
result = self._union(other, sort=sort)
27032678

pandas/core/indexes/datetimelike.py

-3
Original file line numberDiff line numberDiff line change
@@ -597,9 +597,6 @@ def insert(self, loc: int, item):
597597
# --------------------------------------------------------------------
598598
# Join/Set Methods
599599

600-
def _can_union_without_object_cast(self, other) -> bool:
601-
return is_dtype_equal(self.dtype, other.dtype)
602-
603600
def _get_join_freq(self, other):
604601
"""
605602
Get the freq to attach to the result of a join operation.

pandas/core/indexes/numeric.py

-25
Original file line numberDiff line numberDiff line change
@@ -182,23 +182,6 @@ def _is_all_dates(self) -> bool:
182182
"""
183183
return False
184184

185-
def _union(self, other, sort):
186-
# Right now, we treat union(int, float) a bit special.
187-
# See https://github.com/pandas-dev/pandas/issues/26778 for discussion
188-
# We may change union(int, float) to go to object.
189-
# float | [u]int -> float (the special case)
190-
# <T> | <T> -> T
191-
# <T> | <U> -> object
192-
needs_cast = (is_integer_dtype(self.dtype) and is_float_dtype(other.dtype)) or (
193-
is_integer_dtype(other.dtype) and is_float_dtype(self.dtype)
194-
)
195-
if needs_cast:
196-
first = self.astype("float")
197-
second = other.astype("float")
198-
return first._union(second, sort)
199-
else:
200-
return super()._union(other, sort)
201-
202185

203186
_num_index_shared_docs[
204187
"class_descr"
@@ -258,10 +241,6 @@ def _assert_safe_casting(cls, data, subarr):
258241
if not np.array_equal(data, subarr):
259242
raise TypeError("Unsafe NumPy casting, you must explicitly cast")
260243

261-
def _can_union_without_object_cast(self, other) -> bool:
262-
# See GH#26778, further casting may occur in NumericIndex._union
263-
return other.dtype == "f8" or other.dtype == self.dtype
264-
265244
def __contains__(self, key) -> bool:
266245
"""
267246
Check if key is a float and has a decimal. If it has, return False.
@@ -422,7 +401,3 @@ def __contains__(self, other: Any) -> bool:
422401
return True
423402

424403
return is_float(other) and np.isnan(other) and self.hasnans
425-
426-
def _can_union_without_object_cast(self, other) -> bool:
427-
# See GH#26778, further casting may occur in NumericIndex._union
428-
return is_numeric_dtype(other.dtype)

pandas/core/indexes/timedeltas.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ class TimedeltaIndex(DatetimeTimedeltaMixin):
108108

109109
_comparables = ["name", "freq"]
110110
_attributes = ["name", "freq"]
111-
_is_numeric_dtype = True
111+
_is_numeric_dtype = False
112112

113113
_data: TimedeltaArray
114114

pandas/tests/indexes/interval/test_setops.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,20 @@ def test_union_empty_result(self, closed, sort):
3838
result = index.union(index, sort=sort)
3939
tm.assert_index_equal(result, index)
4040

41-
# GH 19101: empty result, different dtypes -> common dtype is object
41+
# GH 19101: empty result, different numeric dtypes -> common dtype is f8
4242
other = empty_index(dtype="float64", closed=closed)
4343
result = index.union(other, sort=sort)
44-
expected = Index([], dtype=object)
44+
expected = other
45+
tm.assert_index_equal(result, expected)
46+
47+
other = index.union(index, sort=sort)
48+
tm.assert_index_equal(result, expected)
49+
50+
other = empty_index(dtype="uint64", closed=closed)
51+
result = index.union(other, sort=sort)
52+
tm.assert_index_equal(result, expected)
53+
54+
result = other.union(index, sort=sort)
4555
tm.assert_index_equal(result, expected)
4656

4757
def test_intersection(self, closed, sort):

0 commit comments

Comments
 (0)