diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6923b42d3340b..33a5b016a293f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -367,7 +367,7 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) -- +- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) .. --------------------------------------------------------------------------- diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 222ae589ea7fc..bdab943c5700c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2507,7 +2507,7 @@ def _union_incompatible_dtypes(self, other, sort): other = Index(other).astype(object, copy=False) return Index.union(this, other, sort=sort).astype(object, copy=False) - def _is_compatible_with_other(self, other) -> bool: + def _can_union_without_object_cast(self, other) -> bool: """ Check whether this and the other dtype are compatible with each other. Meaning a union can be formed between them without needing to be cast @@ -2583,8 +2583,9 @@ def union(self, other, sort=None): """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) + other = ensure_index(other) - if not self._is_compatible_with_other(other): + if not self._can_union_without_object_cast(other): return self._union_incompatible_dtypes(other, sort=sort) return self._union(other, sort=sort) @@ -5655,6 +5656,13 @@ def ensure_index( return MultiIndex.from_arrays(converted) else: + if isinstance(converted, np.ndarray) and converted.dtype == np.int64: + # Check for overflows if we should actually be uint64 + # xref GH#35481 + alt = np.asarray(index_like) + if alt.dtype == np.uint64: + converted = alt + index_like = converted else: # clean_index_list does the equivalent of copying diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 1ab40a76b30ff..0b7856f55ba97 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -622,6 +622,9 @@ def insert(self, loc: int, item): # -------------------------------------------------------------------- # Join/Set Methods + def _can_union_without_object_cast(self, other) -> bool: + return is_dtype_equal(self.dtype, other.dtype) + def _wrap_joined_index(self, joined: np.ndarray, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index 49a70600c09fa..574c9adc31808 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,19 +15,14 @@ is_float, is_float_dtype, is_integer_dtype, + is_numeric_dtype, is_scalar, is_signed_integer_dtype, is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.generic import ( - ABCFloat64Index, - ABCInt64Index, - ABCRangeIndex, - ABCSeries, - ABCUInt64Index, -) +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import is_valid_nat_for_dtype, isna from pandas.core import algorithms @@ -275,11 +270,9 @@ def _assert_safe_casting(cls, data, subarr): if not np.array_equal(data, subarr): raise TypeError("Unsafe NumPy casting, you must explicitly cast") - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance(obj, (ABCInt64Index, ABCFloat64Index, ABCRangeIndex)) - for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return other.dtype == "f8" or other.dtype == self.dtype Int64Index._add_numeric_methods() @@ -324,10 +317,9 @@ def _assert_safe_casting(cls, data, subarr): if not np.array_equal(data, subarr): raise TypeError("Unsafe NumPy casting, you must explicitly cast") - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance(obj, (ABCUInt64Index, ABCFloat64Index)) for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return other.dtype == "f8" or other.dtype == self.dtype UInt64Index._add_numeric_methods() @@ -432,13 +424,9 @@ def isin(self, values, level=None): self._validate_index_level(level) return algorithms.isin(np.array(self), values) - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance( - obj, (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex) - ) - for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return is_numeric_dtype(other.dtype) Float64Index._add_numeric_methods() diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index b01cafc9b0d5c..c40f7b1bc2120 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -5,7 +5,6 @@ import pytest from pandas._libs import iNaT -from pandas.compat.numpy import is_numpy_dev from pandas.errors import InvalidIndexError from pandas.core.dtypes.common import is_datetime64tz_dtype @@ -456,7 +455,7 @@ def test_set_ops_error_cases(self, case, method, index): with pytest.raises(TypeError, match=msg): getattr(index, method)(case) - def test_intersection_base(self, index, request): + def test_intersection_base(self, index): if isinstance(index, CategoricalIndex): return @@ -473,15 +472,6 @@ def test_intersection_base(self, index, request): # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - # https://github.com/pandas-dev/pandas/issues/35481 - if ( - is_numpy_dev - and isinstance(case, Series) - and isinstance(index, UInt64Index) - ): - mark = pytest.mark.xfail(reason="gh-35481") - request.node.add_marker(mark) - result = first.intersection(case) assert tm.equalContents(result, second) @@ -507,7 +497,11 @@ def test_union_base(self, index): for case in cases: if not isinstance(index, CategoricalIndex): result = first.union(case) - assert tm.equalContents(result, everything) + assert tm.equalContents(result, everything), ( + result, + everything, + type(case), + ) if isinstance(index, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index f19e78323ab23..102c8f97a8a6b 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -46,10 +46,8 @@ def test_union3(self, sort, box): first = everything[:5] second = everything[5:] - # GH 10149 - expected = ( - first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O") - ) + # GH 10149 support listlike inputs other than Index objects + expected = first.union(second, sort=sort) case = box(second.values) result = first.union(case, sort=sort) tm.assert_index_equal(result, expected)