diff --git a/pandas/tests/indexes/base_class/test_setops.py b/pandas/tests/indexes/base_class/test_setops.py index b571ff7f63f58..9a6a892307da8 100644 --- a/pandas/tests/indexes/base_class/test_setops.py +++ b/pandas/tests/indexes/base_class/test_setops.py @@ -1,3 +1,5 @@ +from datetime import datetime + import numpy as np import pytest @@ -83,7 +85,7 @@ def test_union_sort_other_incomparable(self): result = idx.union(idx[:1], sort=False) tm.assert_index_equal(result, idx) - @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") def test_union_sort_other_incomparable_true(self): # TODO decide on True behaviour # sort=True @@ -91,6 +93,13 @@ def test_union_sort_other_incomparable_true(self): with pytest.raises(TypeError, match=".*"): idx.union(idx[:1], sort=True) + @pytest.mark.xfail(reason="GH#25151 need to decide on True behavior") + def test_intersection_equal_sort_true(self): + # TODO decide on True behaviour + idx = Index(["c", "a", "b"]) + sorted_ = Index(["a", "b", "c"]) + tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) + def test_intersection_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = Index([0, "a", 1, "b", 2, "c"]) @@ -111,7 +120,7 @@ def test_intersection_different_type_base(self, klass, sort): result = first.intersection(klass(second.values), sort=sort) assert tm.equalContents(result, second) - def test_intersect_nosort(self): + def test_intersection_nosort(self): result = Index(["c", "b", "a"]).intersection(["b", "a"]) expected = Index(["b", "a"]) tm.assert_index_equal(result, expected) @@ -121,6 +130,28 @@ def test_intersection_equal_sort(self): tm.assert_index_equal(idx.intersection(idx, sort=False), idx) tm.assert_index_equal(idx.intersection(idx, sort=None), idx) + def test_intersection_str_dates(self, sort): + dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] + + i1 = Index(dt_dates, dtype=object) + i2 = Index(["aa"], dtype=object) + result = i2.intersection(i1, sort=sort) + + assert len(result) == 0 + + @pytest.mark.parametrize( + "index2,expected_arr", + [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])], + ) + def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): + # non-monotonic non-unique + index1 = Index(["A", "B", "A", "C"]) + expected = Index(expected_arr, dtype="object") + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + def test_difference_base(self, sort): # (same results for py2 and py3 but sortedness not tested elsewhere) index = Index([0, "a", 1, "b", 2, "c"]) @@ -142,3 +173,74 @@ def test_symmetric_difference(self): result = first.symmetric_difference(second) expected = Index([0, 1, 2, "a", "c"]) tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "method,expected,sort", + [ + ( + "intersection", + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + False, + ), + ( + "intersection", + np.array( + [(1, "A"), (1, "B"), (2, "A"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ( + "union", + np.array( + [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ), + None, + ), + ], + ) + def test_tuple_union_bug(self, method, expected, sort): + index1 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B")], + dtype=[("num", int), ("let", "a1")], + ) + ) + index2 = Index( + np.array( + [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], + dtype=[("num", int), ("let", "a1")], + ) + ) + + result = getattr(index1, method)(index2, sort=sort) + assert result.ndim == 1 + + expected = Index(expected) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("first_list", [list("ba"), list()]) + @pytest.mark.parametrize("second_list", [list("ab"), list()]) + @pytest.mark.parametrize( + "first_name, second_name, expected_name", + [("A", "B", None), (None, "B", None), ("A", None, None)], + ) + def test_union_name_preservation( + self, first_list, second_list, first_name, second_name, expected_name, sort + ): + first = Index(first_list, name=first_name) + second = Index(second_list, name=second_name) + union = first.union(second, sort=sort) + + vals = set(first_list).union(second_list) + + if sort is None and len(first_list) > 0 and len(second_list) > 0: + expected = Index(sorted(vals), name=expected_name) + tm.assert_index_equal(union, expected) + else: + expected = Index(vals, name=expected_name) + tm.equalContents(union, expected) diff --git a/pandas/tests/indexes/numeric/test_setops.py b/pandas/tests/indexes/numeric/test_setops.py new file mode 100644 index 0000000000000..6cde3e2366062 --- /dev/null +++ b/pandas/tests/indexes/numeric/test_setops.py @@ -0,0 +1,139 @@ +from datetime import datetime, timedelta + +import numpy as np +import pytest + +from pandas import Float64Index, Index, Int64Index, RangeIndex, UInt64Index +import pandas._testing as tm + + +@pytest.fixture +def index_large(): + # large values used in TestUInt64Index where no compat needed with Int64/Float64 + large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] + return UInt64Index(large) + + +class TestSetOps: + @pytest.mark.parametrize("dtype", ["f8", "u8", "i8"]) + def test_union_non_numeric(self, dtype): + # corner case, non-numeric + index = Index(np.arange(5, dtype=dtype), dtype=dtype) + assert index.dtype == dtype + + other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) + result = index.union(other) + expected = Index(np.concatenate((index, other))) + tm.assert_index_equal(result, expected) + + result = other.union(index) + expected = Index(np.concatenate((other, index))) + tm.assert_index_equal(result, expected) + + def test_intersection(self): + index = Int64Index(range(5)) + + other = Index([1, 2, 3, 4, 5]) + result = index.intersection(other) + expected = Index(np.sort(np.intersect1d(index.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index) + expected = Index( + np.sort(np.asarray(np.intersect1d(index.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize("dtype", ["int64", "uint64"]) + def test_int_float_union_dtype(self, dtype): + # https://github.com/pandas-dev/pandas/issues/26778 + # [u]int | float -> float + index = Index([0, 2, 3], dtype=dtype) + other = Float64Index([0.5, 1.5]) + expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) + result = index.union(other) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_range_float_union_dtype(self): + # https://github.com/pandas-dev/pandas/issues/26778 + index = RangeIndex(start=0, stop=3) + other = Float64Index([0.5, 1.5]) + result = index.union(other) + expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0]) + tm.assert_index_equal(result, expected) + + result = other.union(index) + tm.assert_index_equal(result, expected) + + def test_float64_index_difference(self): + # https://github.com/pandas-dev/pandas/issues/35217 + float_index = Index([1.0, 2, 3]) + string_index = Index(["1", "2", "3"]) + + result = float_index.difference(string_index) + tm.assert_index_equal(result, float_index) + + result = string_index.difference(float_index) + tm.assert_index_equal(result, string_index) + + def test_intersection_uint64_outside_int64_range(self, index_large): + other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20]) + result = index_large.intersection(other) + expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) + tm.assert_index_equal(result, expected) + + result = other.intersection(index_large) + expected = Index( + np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) + ) + tm.assert_index_equal(result, expected) + + @pytest.mark.parametrize( + "index2,keeps_name", + [ + (Index([4, 7, 6, 5, 3], name="index"), True), + (Index([4, 7, 6, 5, 3], name="other"), False), + ], + ) + def test_intersection_monotonic(self, index2, keeps_name, sort): + index1 = Index([5, 3, 2, 4, 1], name="index") + expected = Index([5, 3, 4]) + + if keeps_name: + expected.name = "index" + + result = index1.intersection(index2, sort=sort) + if sort is None: + expected = expected.sort_values() + tm.assert_index_equal(result, expected) + + +class TestSetOpsSort: + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_other_special(self, slice_): + # https://github.com/pandas-dev/pandas/issues/24959 + + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + tm.assert_index_equal(idx.union(other), idx) + tm.assert_index_equal(other.union(idx), idx) + + # sort=False + tm.assert_index_equal(idx.union(other, sort=False), idx) + + @pytest.mark.xfail(reason="Not implemented") + @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) + def test_union_sort_special_true(self, slice_): + # TODO: decide on True behaviour + # sort=True + idx = Index([1, 0, 2]) + # default, sort=None + other = idx[slice_] + + result = idx.union(other, sort=True) + expected = Index([0, 1, 2]) + tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/test_base.py b/pandas/tests/indexes/test_base.py index ec03d5466d1f0..2e3a70e8c2215 100644 --- a/pandas/tests/indexes/test_base.py +++ b/pandas/tests/indexes/test_base.py @@ -660,54 +660,6 @@ def test_intersection_name_preservation2( intersect = first.intersection(second, sort=sort) assert intersect.name == expected_name - @pytest.mark.parametrize( - "index2,keeps_name", - [ - (Index([4, 7, 6, 5, 3], name="index"), True), - (Index([4, 7, 6, 5, 3], name="other"), False), - ], - ) - def test_intersection_monotonic(self, index2, keeps_name, sort): - index1 = Index([5, 3, 2, 4, 1], name="index") - expected = Index([5, 3, 4]) - - if keeps_name: - expected.name = "index" - - result = index1.intersection(index2, sort=sort) - if sort is None: - expected = expected.sort_values() - tm.assert_index_equal(result, expected) - - @pytest.mark.parametrize( - "index2,expected_arr", - [(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])], - ) - def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort): - # non-monotonic non-unique - index1 = Index(["A", "B", "A", "C"]) - expected = Index(expected_arr, dtype="object") - result = index1.intersection(index2, sort=sort) - if sort is None: - expected = expected.sort_values() - tm.assert_index_equal(result, expected) - - def test_intersect_str_dates(self, sort): - dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)] - - i1 = Index(dt_dates, dtype=object) - i2 = Index(["aa"], dtype=object) - result = i2.intersection(i1, sort=sort) - - assert len(result) == 0 - - @pytest.mark.xfail(reason="Not implemented") - def test_intersection_equal_sort_true(self): - # TODO decide on True behaviour - idx = Index(["c", "a", "b"]) - sorted_ = Index(["a", "b", "c"]) - tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_) - def test_chained_union(self, sort): # Chained unions handles names correctly i1 = Index([1, 2], name="i1") @@ -735,32 +687,6 @@ def test_union(self, index, sort): tm.assert_index_equal(union, everything.sort_values()) assert tm.equalContents(union, everything) - @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) - def test_union_sort_other_special(self, slice_): - # https://github.com/pandas-dev/pandas/issues/24959 - - idx = Index([1, 0, 2]) - # default, sort=None - other = idx[slice_] - tm.assert_index_equal(idx.union(other), idx) - tm.assert_index_equal(other.union(idx), idx) - - # sort=False - tm.assert_index_equal(idx.union(other, sort=False), idx) - - @pytest.mark.xfail(reason="Not implemented") - @pytest.mark.parametrize("slice_", [slice(None), slice(0)]) - def test_union_sort_special_true(self, slice_): - # TODO decide on True behaviour - # sort=True - idx = Index([1, 0, 2]) - # default, sort=None - other = idx[slice_] - - result = idx.union(other, sort=True) - expected = Index([0, 1, 2]) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize("klass", [np.array, Series, list]) @pytest.mark.parametrize("index", ["string"], indirect=True) def test_union_from_iterables(self, index, klass, sort): @@ -791,28 +717,6 @@ def test_union_identity(self, index, sort): union = Index([]).union(first, sort=sort) assert (union is first) is (not sort) - @pytest.mark.parametrize("first_list", [list("ba"), list()]) - @pytest.mark.parametrize("second_list", [list("ab"), list()]) - @pytest.mark.parametrize( - "first_name, second_name, expected_name", - [("A", "B", None), (None, "B", None), ("A", None, None)], - ) - def test_union_name_preservation( - self, first_list, second_list, first_name, second_name, expected_name, sort - ): - first = Index(first_list, name=first_name) - second = Index(second_list, name=second_name) - union = first.union(second, sort=sort) - - vals = set(first_list).union(second_list) - - if sort is None and len(first_list) > 0 and len(second_list) > 0: - expected = Index(sorted(vals), name=expected_name) - tm.assert_index_equal(union, expected) - else: - expected = Index(vals, name=expected_name) - assert tm.equalContents(union, expected) - def test_union_dt_as_obj(self, sort): # TODO: Replace with fixturesult index = self.create_index() @@ -820,10 +724,7 @@ def test_union_dt_as_obj(self, sort): first_cat = index.union(date_index) second_cat = index.union(index) - if date_index.dtype == np.object_: - appended = np.append(index, date_index) - else: - appended = np.append(index, date_index.astype("O")) + appended = np.append(index, date_index.astype("O")) assert tm.equalContents(first_cat, appended) assert tm.equalContents(second_cat, index) @@ -1595,55 +1496,6 @@ def test_drop_tuple(self, values, to_drop): with pytest.raises(KeyError, match=msg): removed.drop(drop_me) - @pytest.mark.parametrize( - "method,expected,sort", - [ - ( - "intersection", - np.array( - [(1, "A"), (2, "A"), (1, "B"), (2, "B")], - dtype=[("num", int), ("let", "a1")], - ), - False, - ), - ( - "intersection", - np.array( - [(1, "A"), (1, "B"), (2, "A"), (2, "B")], - dtype=[("num", int), ("let", "a1")], - ), - None, - ), - ( - "union", - np.array( - [(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")], - dtype=[("num", int), ("let", "a1")], - ), - None, - ), - ], - ) - def test_tuple_union_bug(self, method, expected, sort): - index1 = Index( - np.array( - [(1, "A"), (2, "A"), (1, "B"), (2, "B")], - dtype=[("num", int), ("let", "a1")], - ) - ) - index2 = Index( - np.array( - [(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")], - dtype=[("num", int), ("let", "a1")], - ) - ) - - result = getattr(index1, method)(index2, sort=sort) - assert result.ndim == 1 - - expected = Index(expected) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "attr", [ diff --git a/pandas/tests/indexes/test_numeric.py b/pandas/tests/indexes/test_numeric.py index d69cbeac31a32..11f2a9f07a4c2 100644 --- a/pandas/tests/indexes/test_numeric.py +++ b/pandas/tests/indexes/test_numeric.py @@ -1,4 +1,4 @@ -from datetime import datetime, timedelta +from datetime import datetime import numpy as np import pytest @@ -408,18 +408,6 @@ def test_identical(self): assert not index.astype(dtype=object).identical(index.astype(dtype=self._dtype)) - def test_union_noncomparable(self): - # corner case, non-Int64Index - index = self.create_index() - other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object) - result = index.union(other) - expected = Index(np.concatenate((index, other))) - tm.assert_index_equal(result, expected) - - result = other.union(index) - expected = Index(np.concatenate((other, index))) - tm.assert_index_equal(result, expected) - def test_cant_or_shouldnt_cast(self): msg = ( "String dtype not supported, " @@ -535,19 +523,6 @@ def test_coerce_list(self): arr = Index([1, 2, 3, 4], dtype=object) assert isinstance(arr, Index) - def test_intersection(self): - index = self.create_index() - other = Index([1, 2, 3, 4, 5]) - result = index.intersection(other) - expected = Index(np.sort(np.intersect1d(index.values, other.values))) - tm.assert_index_equal(result, expected) - - result = other.intersection(index) - expected = Index( - np.sort(np.asarray(np.intersect1d(index.values, other.values))) - ) - tm.assert_index_equal(result, expected) - class TestUInt64Index(NumericInt): @@ -564,14 +539,8 @@ class TestUInt64Index(NumericInt): def index(self, request): return UInt64Index(request.param) - @pytest.fixture - def index_large(self): - # large values used in TestUInt64Index where no compat needed with Int64/Float64 - large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25] - return UInt64Index(large) - def create_index(self) -> UInt64Index: - # compat with shared Int64/Float64 tests; use index_large for UInt64 only tests + # compat with shared Int64/Float64 tests return UInt64Index(np.arange(5, dtype="uint64")) def test_constructor(self): @@ -596,44 +565,6 @@ def test_constructor(self): res = Index([1, 2 ** 63 + 1], dtype=np.uint64) tm.assert_index_equal(res, idx) - def test_intersection(self, index_large): - other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20]) - result = index_large.intersection(other) - expected = Index(np.sort(np.intersect1d(index_large.values, other.values))) - tm.assert_index_equal(result, expected) - - result = other.intersection(index_large) - expected = Index( - np.sort(np.asarray(np.intersect1d(index_large.values, other.values))) - ) - tm.assert_index_equal(result, expected) - - -@pytest.mark.parametrize("dtype", ["int64", "uint64"]) -def test_int_float_union_dtype(dtype): - # https://github.com/pandas-dev/pandas/issues/26778 - # [u]int | float -> float - index = Index([0, 2, 3], dtype=dtype) - other = Float64Index([0.5, 1.5]) - expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0]) - result = index.union(other) - tm.assert_index_equal(result, expected) - - result = other.union(index) - tm.assert_index_equal(result, expected) - - -def test_range_float_union_dtype(): - # https://github.com/pandas-dev/pandas/issues/26778 - index = pd.RangeIndex(start=0, stop=3) - other = Float64Index([0.5, 1.5]) - result = index.union(other) - expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0]) - tm.assert_index_equal(result, expected) - - result = other.union(index) - tm.assert_index_equal(result, expected) - @pytest.mark.parametrize( "box", @@ -675,15 +606,3 @@ def test_float64_index_equals(): result = string_index.equals(float_index) assert result is False - - -def test_float64_index_difference(): - # https://github.com/pandas-dev/pandas/issues/35217 - float_index = Index([1.0, 2, 3]) - string_index = Index(["1", "2", "3"]) - - result = float_index.difference(string_index) - tm.assert_index_equal(result, float_index) - - result = string_index.difference(float_index) - tm.assert_index_equal(result, string_index)