Skip to content

TST/REF: collect Index setops tests #38019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 24, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 104 additions & 2 deletions pandas/tests/indexes/base_class/test_setops.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

import numpy as np
import pytest

Expand Down Expand Up @@ -83,14 +85,21 @@ def test_union_sort_other_incomparable(self):
result = idx.union(idx[:1], sort=False)
tm.assert_index_equal(result, idx)

@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.xfail(reason="GH#25151 need to decide on True behavior")
def test_union_sort_other_incomparable_true(self):
# TODO decide on True behaviour
# sort=True
idx = Index([1, pd.Timestamp("2000")])
with pytest.raises(TypeError, match=".*"):
idx.union(idx[:1], sort=True)

@pytest.mark.xfail(reason="GH#25151 need to decide on True behavior")
def test_intersection_equal_sort_true(self):
# TODO decide on True behaviour
idx = Index(["c", "a", "b"])
sorted_ = Index(["a", "b", "c"])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)

def test_intersection_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
Expand All @@ -111,7 +120,7 @@ def test_intersection_different_type_base(self, klass, sort):
result = first.intersection(klass(second.values), sort=sort)
assert tm.equalContents(result, second)

def test_intersect_nosort(self):
def test_intersection_nosort(self):
result = Index(["c", "b", "a"]).intersection(["b", "a"])
expected = Index(["b", "a"])
tm.assert_index_equal(result, expected)
Expand All @@ -121,6 +130,28 @@ def test_intersection_equal_sort(self):
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)

def test_intersection_str_dates(self, sort):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]

i1 = Index(dt_dates, dtype=object)
i2 = Index(["aa"], dtype=object)
result = i2.intersection(i1, sort=sort)

assert len(result) == 0

@pytest.mark.parametrize(
"index2,expected_arr",
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])],
)
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr, dtype="object")
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)

def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
Expand All @@ -142,3 +173,74 @@ def test_symmetric_difference(self):
result = first.symmetric_difference(second)
expected = Index([0, 1, 2, "a", "c"])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"method,expected,sort",
[
(
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
],
)
def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
)
)

result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1

expected = Index(expected)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("first_list", [list("ba"), list()])
@pytest.mark.parametrize("second_list", [list("ab"), list()])
@pytest.mark.parametrize(
"first_name, second_name, expected_name",
[("A", "B", None), (None, "B", None), ("A", None, None)],
)
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)

vals = set(first_list).union(second_list)

if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
tm.equalContents(union, expected)
139 changes: 139 additions & 0 deletions pandas/tests/indexes/numeric/test_setops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from datetime import datetime, timedelta

import numpy as np
import pytest

from pandas import Float64Index, Index, Int64Index, RangeIndex, UInt64Index
import pandas._testing as tm


@pytest.fixture
def index_large():
# large values used in TestUInt64Index where no compat needed with Int64/Float64
large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25]
return UInt64Index(large)


class TestSetOps:
@pytest.mark.parametrize("dtype", ["f8", "u8", "i8"])
def test_union_non_numeric(self, dtype):
# corner case, non-numeric
index = Index(np.arange(5, dtype=dtype), dtype=dtype)
assert index.dtype == dtype

other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object)
result = index.union(other)
expected = Index(np.concatenate((index, other)))
tm.assert_index_equal(result, expected)

result = other.union(index)
expected = Index(np.concatenate((other, index)))
tm.assert_index_equal(result, expected)

def test_intersection(self):
index = Int64Index(range(5))

other = Index([1, 2, 3, 4, 5])
result = index.intersection(other)
expected = Index(np.sort(np.intersect1d(index.values, other.values)))
tm.assert_index_equal(result, expected)

result = other.intersection(index)
expected = Index(
np.sort(np.asarray(np.intersect1d(index.values, other.values)))
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("dtype", ["int64", "uint64"])
def test_int_float_union_dtype(self, dtype):
# https://github.com/pandas-dev/pandas/issues/26778
# [u]int | float -> float
index = Index([0, 2, 3], dtype=dtype)
other = Float64Index([0.5, 1.5])
expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
result = index.union(other)
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)

def test_range_float_union_dtype(self):
# https://github.com/pandas-dev/pandas/issues/26778
index = RangeIndex(start=0, stop=3)
other = Float64Index([0.5, 1.5])
result = index.union(other)
expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0])
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)

def test_float64_index_difference(self):
# https://github.com/pandas-dev/pandas/issues/35217
float_index = Index([1.0, 2, 3])
string_index = Index(["1", "2", "3"])

result = float_index.difference(string_index)
tm.assert_index_equal(result, float_index)

result = string_index.difference(float_index)
tm.assert_index_equal(result, string_index)

def test_intersection_uint64_outside_int64_range(self, index_large):
other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20])
result = index_large.intersection(other)
expected = Index(np.sort(np.intersect1d(index_large.values, other.values)))
tm.assert_index_equal(result, expected)

result = other.intersection(index_large)
expected = Index(
np.sort(np.asarray(np.intersect1d(index_large.values, other.values)))
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"index2,keeps_name",
[
(Index([4, 7, 6, 5, 3], name="index"), True),
(Index([4, 7, 6, 5, 3], name="other"), False),
],
)
def test_intersection_monotonic(self, index2, keeps_name, sort):
index1 = Index([5, 3, 2, 4, 1], name="index")
expected = Index([5, 3, 4])

if keeps_name:
expected.name = "index"

result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)


class TestSetOpsSort:
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_special(self, slice_):
# https://github.com/pandas-dev/pandas/issues/24959

idx = Index([1, 0, 2])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)

# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)

@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_special_true(self, slice_):
# TODO: decide on True behaviour
# sort=True
idx = Index([1, 0, 2])
# default, sort=None
other = idx[slice_]

result = idx.union(other, sort=True)
expected = Index([0, 1, 2])
tm.assert_index_equal(result, expected)
Loading