From fdf4b7fbd4991a850e120b5509294c29200d2107 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Sep 2020 08:06:13 -0700 Subject: [PATCH 1/5] REF: _is_compatible_with_other -> _can_union_without_object_cast --- pandas/core/indexes/base.py | 5 +-- pandas/core/indexes/datetimelike.py | 1 + pandas/core/indexes/numeric.py | 34 ++++++------------- pandas/tests/indexes/datetimes/test_setops.py | 6 ++-- 4 files changed, 17 insertions(+), 29 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 3d177e08bb0f5..17e4a39c4a84c 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -2507,7 +2507,7 @@ def _union_incompatible_dtypes(self, other, sort): other = Index(other).astype(object, copy=False) return Index.union(this, other, sort=sort).astype(object, copy=False) - def _is_compatible_with_other(self, other) -> bool: + def _can_union_without_object_cast(self, other) -> bool: """ Check whether this and the other dtype are compatible with each other. Meaning a union can be formed between them without needing to be cast @@ -2583,8 +2583,9 @@ def union(self, other, sort=None): """ self._validate_sort_keyword(sort) self._assert_can_do_setop(other) + other = ensure_index(other) - if not self._is_compatible_with_other(other): + if not self._can_union_without_object_cast(other): return self._union_incompatible_dtypes(other, sort=sort) return self._union(other, sort=sort) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 5ba5732c710f7..a23c3d1fba4b2 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -98,6 +98,7 @@ class DatetimeIndexOpsMixin(ExtensionIndex): DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) _hasnans = hasnans # for index / array -agnostic code + _can_union_without_object_cast = Index._can_union_without_object_cast @property def is_all_dates(self) -> bool: diff --git a/pandas/core/indexes/numeric.py b/pandas/core/indexes/numeric.py index f6859cbc4c0a2..6a02790258c25 100644 --- a/pandas/core/indexes/numeric.py +++ b/pandas/core/indexes/numeric.py @@ -15,19 +15,14 @@ is_float, is_float_dtype, is_integer_dtype, + is_numeric_dtype, is_scalar, is_signed_integer_dtype, is_unsigned_integer_dtype, needs_i8_conversion, pandas_dtype, ) -from pandas.core.dtypes.generic import ( - ABCFloat64Index, - ABCInt64Index, - ABCRangeIndex, - ABCSeries, - ABCUInt64Index, -) +from pandas.core.dtypes.generic import ABCSeries from pandas.core.dtypes.missing import isna from pandas.core import algorithms @@ -270,11 +265,9 @@ def _assert_safe_casting(cls, data, subarr): if not np.array_equal(data, subarr): raise TypeError("Unsafe NumPy casting, you must explicitly cast") - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance(obj, (ABCInt64Index, ABCFloat64Index, ABCRangeIndex)) - for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return other.dtype == "f8" or other.dtype == self.dtype Int64Index._add_numeric_methods() @@ -328,10 +321,9 @@ def _assert_safe_casting(cls, data, subarr): if not np.array_equal(data, subarr): raise TypeError("Unsafe NumPy casting, you must explicitly cast") - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance(obj, (ABCUInt64Index, ABCFloat64Index)) for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return other.dtype == "f8" or other.dtype == self.dtype UInt64Index._add_numeric_methods() @@ -436,13 +428,9 @@ def isin(self, values, level=None): self._validate_index_level(level) return algorithms.isin(np.array(self), values) - def _is_compatible_with_other(self, other) -> bool: - return super()._is_compatible_with_other(other) or all( - isinstance( - obj, (ABCInt64Index, ABCFloat64Index, ABCUInt64Index, ABCRangeIndex) - ) - for obj in [self, other] - ) + def _can_union_without_object_cast(self, other) -> bool: + # See GH#26778, further casting may occur in NumericIndex._union + return is_numeric_dtype(other.dtype) Float64Index._add_numeric_methods() diff --git a/pandas/tests/indexes/datetimes/test_setops.py b/pandas/tests/indexes/datetimes/test_setops.py index f19e78323ab23..102c8f97a8a6b 100644 --- a/pandas/tests/indexes/datetimes/test_setops.py +++ b/pandas/tests/indexes/datetimes/test_setops.py @@ -46,10 +46,8 @@ def test_union3(self, sort, box): first = everything[:5] second = everything[5:] - # GH 10149 - expected = ( - first.astype("O").union(pd.Index(second.values, dtype="O")).astype("O") - ) + # GH 10149 support listlike inputs other than Index objects + expected = first.union(second, sort=sort) case = box(second.values) result = first.union(case, sort=sort) tm.assert_index_equal(result, expected) From 5a5016a9f18a2cc41d90dae0774998c78a72ae30 Mon Sep 17 00:00:00 2001 From: Brock Date: Tue, 15 Sep 2020 08:31:40 -0700 Subject: [PATCH 2/5] mypy fixup --- pandas/core/indexes/datetimelike.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index a23c3d1fba4b2..6b6a50b819fa4 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -98,7 +98,6 @@ class DatetimeIndexOpsMixin(ExtensionIndex): DatetimeLikeArrayMixin._hasnans.fget # type: ignore[attr-defined] ) _hasnans = hasnans # for index / array -agnostic code - _can_union_without_object_cast = Index._can_union_without_object_cast @property def is_all_dates(self) -> bool: @@ -578,6 +577,9 @@ def delete(self, loc): # -------------------------------------------------------------------- # Join/Set Methods + def _can_union_without_object_cast(self, other) -> bool: + return is_dtype_equal(self.dtype, other.dtype) + def _wrap_joined_index(self, joined: np.ndarray, other): assert other.dtype == self.dtype, (other.dtype, self.dtype) name = get_op_result_name(self, other) From 09d3792853d5780840fcc9d8ec98143be77e2d83 Mon Sep 17 00:00:00 2001 From: Brock Date: Wed, 16 Sep 2020 20:09:14 -0700 Subject: [PATCH 3/5] troubleshoot --- pandas/tests/indexes/common.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 11dc232af8de4..9d39e0730c244 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -507,7 +507,11 @@ def test_union_base(self, index): for case in cases: if not isinstance(index, CategoricalIndex): result = first.union(case) - assert tm.equalContents(result, everything) + assert tm.equalContents(result, everything), ( + result, + everything, + type(case), + ) if isinstance(index, MultiIndex): msg = "other must be a MultiIndex or a list of tuples" From 4b372e8ccd190448d696a858e45cab53b56a1051 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 17 Sep 2020 10:23:35 -0700 Subject: [PATCH 4/5] Get uint64 right in ensure_index --- pandas/core/indexes/base.py | 7 +++++++ pandas/tests/indexes/common.py | 12 +----------- 2 files changed, 8 insertions(+), 11 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index c873bd022ef75..5dc7ae88b180e 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -5665,6 +5665,13 @@ def ensure_index( return MultiIndex.from_arrays(converted) else: + if isinstance(converted, np.ndarray) and converted.dtype == np.int64: + # Check for overflows if we should actually be uint64 + # xref GH#35481 + alt = np.asarray(index_like) + if alt.dtype == np.uint64: + converted = alt + index_like = converted else: # clean_index_list does the equivalent of copying diff --git a/pandas/tests/indexes/common.py b/pandas/tests/indexes/common.py index 9d39e0730c244..815a4c41f20d2 100644 --- a/pandas/tests/indexes/common.py +++ b/pandas/tests/indexes/common.py @@ -5,7 +5,6 @@ import pytest from pandas._libs import iNaT -from pandas.compat.numpy import is_numpy_dev from pandas.errors import InvalidIndexError from pandas.core.dtypes.common import is_datetime64tz_dtype @@ -456,7 +455,7 @@ def test_set_ops_error_cases(self, case, method, index): with pytest.raises(TypeError, match=msg): getattr(index, method)(case) - def test_intersection_base(self, index, request): + def test_intersection_base(self, index): if isinstance(index, CategoricalIndex): return @@ -473,15 +472,6 @@ def test_intersection_base(self, index, request): # GH 10149 cases = [klass(second.values) for klass in [np.array, Series, list]] for case in cases: - # https://github.com/pandas-dev/pandas/issues/35481 - if ( - is_numpy_dev - and isinstance(case, Series) - and isinstance(index, UInt64Index) - ): - mark = pytest.mark.xfail(reason="gh-35481") - request.node.add_marker(mark) - result = first.intersection(case) assert tm.equalContents(result, second) From 4f0f2303a99b1827db980145b56e2b4ec2133377 Mon Sep 17 00:00:00 2001 From: Brock Date: Fri, 18 Sep 2020 09:37:24 -0700 Subject: [PATCH 5/5] whatsnew --- doc/source/whatsnew/v1.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.2.0.rst b/doc/source/whatsnew/v1.2.0.rst index 6923b42d3340b..33a5b016a293f 100644 --- a/doc/source/whatsnew/v1.2.0.rst +++ b/doc/source/whatsnew/v1.2.0.rst @@ -367,7 +367,7 @@ Other - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`) - Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`) - Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`) -- +- Bug in :meth:`Index.union` behaving differently depending on whether operand is a :class:`Index` or other list-like (:issue:`36384`) .. ---------------------------------------------------------------------------