Skip to content

TST/REF: collect Index setops tests #38019

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 24, 2020
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
104 changes: 103 additions & 1 deletion pandas/tests/indexes/base_class/test_setops.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from datetime import datetime

import numpy as np
import pytest

Expand Down Expand Up @@ -91,6 +93,13 @@ def test_union_sort_other_incomparable_true(self):
with pytest.raises(TypeError, match=".*"):
idx.union(idx[:1], sort=True)

@pytest.mark.xfail(reason="Not implemented")
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do we have an issue number for this? pls add here and xfail msg

def test_intersection_equal_sort_true(self):
# TODO decide on True behaviour
idx = Index(["c", "a", "b"])
sorted_ = Index(["a", "b", "c"])
tm.assert_index_equal(idx.intersection(idx, sort=True), sorted_)

def test_intersection_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
Expand All @@ -111,7 +120,7 @@ def test_intersection_different_type_base(self, klass, sort):
result = first.intersection(klass(second.values), sort=sort)
assert tm.equalContents(result, second)

def test_intersect_nosort(self):
def test_intersection_nosort(self):
result = Index(["c", "b", "a"]).intersection(["b", "a"])
expected = Index(["b", "a"])
tm.assert_index_equal(result, expected)
Expand All @@ -121,6 +130,28 @@ def test_intersection_equal_sort(self):
tm.assert_index_equal(idx.intersection(idx, sort=False), idx)
tm.assert_index_equal(idx.intersection(idx, sort=None), idx)

def test_intersection_str_dates(self, sort):
dt_dates = [datetime(2012, 2, 9), datetime(2012, 2, 22)]

i1 = Index(dt_dates, dtype=object)
i2 = Index(["aa"], dtype=object)
result = i2.intersection(i1, sort=sort)

assert len(result) == 0

@pytest.mark.parametrize(
"index2,expected_arr",
[(Index(["B", "D"]), ["B"]), (Index(["B", "D", "A"]), ["A", "B", "A"])],
)
def test_intersection_non_monotonic_non_unique(self, index2, expected_arr, sort):
# non-monotonic non-unique
index1 = Index(["A", "B", "A", "C"])
expected = Index(expected_arr, dtype="object")
result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)

def test_difference_base(self, sort):
# (same results for py2 and py3 but sortedness not tested elsewhere)
index = Index([0, "a", 1, "b", 2, "c"])
Expand All @@ -142,3 +173,74 @@ def test_symmetric_difference(self):
result = first.symmetric_difference(second)
expected = Index([0, 1, 2, "a", "c"])
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"method,expected,sort",
[
(
"intersection",
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
False,
),
(
"intersection",
np.array(
[(1, "A"), (1, "B"), (2, "A"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
(
"union",
np.array(
[(1, "A"), (1, "B"), (1, "C"), (2, "A"), (2, "B"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
),
None,
),
],
)
def test_tuple_union_bug(self, method, expected, sort):
index1 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B")],
dtype=[("num", int), ("let", "a1")],
)
)
index2 = Index(
np.array(
[(1, "A"), (2, "A"), (1, "B"), (2, "B"), (1, "C"), (2, "C")],
dtype=[("num", int), ("let", "a1")],
)
)

result = getattr(index1, method)(index2, sort=sort)
assert result.ndim == 1

expected = Index(expected)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("first_list", [list("ba"), list()])
@pytest.mark.parametrize("second_list", [list("ab"), list()])
@pytest.mark.parametrize(
"first_name, second_name, expected_name",
[("A", "B", None), (None, "B", None), ("A", None, None)],
)
def test_union_name_preservation(
self, first_list, second_list, first_name, second_name, expected_name, sort
):
first = Index(first_list, name=first_name)
second = Index(second_list, name=second_name)
union = first.union(second, sort=sort)

vals = set(first_list).union(second_list)

if sort is None and len(first_list) > 0 and len(second_list) > 0:
expected = Index(sorted(vals), name=expected_name)
tm.assert_index_equal(union, expected)
else:
expected = Index(vals, name=expected_name)
tm.equalContents(union, expected)
139 changes: 139 additions & 0 deletions pandas/tests/indexes/numeric/test_setops.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
from datetime import datetime, timedelta

import numpy as np
import pytest

from pandas import Float64Index, Index, Int64Index, RangeIndex, UInt64Index
import pandas._testing as tm


@pytest.fixture
def index_large():
# large values used in TestUInt64Index where no compat needed with Int64/Float64
large = [2 ** 63, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20, 2 ** 63 + 25]
return UInt64Index(large)


class TestSetOps:
@pytest.mark.parametrize("dtype", ["f8", "u8", "i8"])
def test_union_non_numeric(self, dtype):
# corner case, non-numeric
index = Index(np.arange(5, dtype=dtype), dtype=dtype)
assert index.dtype == dtype

other = Index([datetime.now() + timedelta(i) for i in range(4)], dtype=object)
result = index.union(other)
expected = Index(np.concatenate((index, other)))
tm.assert_index_equal(result, expected)

result = other.union(index)
expected = Index(np.concatenate((other, index)))
tm.assert_index_equal(result, expected)

def test_intersection(self):
index = Int64Index(range(5))

other = Index([1, 2, 3, 4, 5])
result = index.intersection(other)
expected = Index(np.sort(np.intersect1d(index.values, other.values)))
tm.assert_index_equal(result, expected)

result = other.intersection(index)
expected = Index(
np.sort(np.asarray(np.intersect1d(index.values, other.values)))
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize("dtype", ["int64", "uint64"])
def test_int_float_union_dtype(self, dtype):
# https://github.com/pandas-dev/pandas/issues/26778
# [u]int | float -> float
index = Index([0, 2, 3], dtype=dtype)
other = Float64Index([0.5, 1.5])
expected = Float64Index([0.0, 0.5, 1.5, 2.0, 3.0])
result = index.union(other)
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)

def test_range_float_union_dtype(self):
# https://github.com/pandas-dev/pandas/issues/26778
index = RangeIndex(start=0, stop=3)
other = Float64Index([0.5, 1.5])
result = index.union(other)
expected = Float64Index([0.0, 0.5, 1, 1.5, 2.0])
tm.assert_index_equal(result, expected)

result = other.union(index)
tm.assert_index_equal(result, expected)

def test_float64_index_difference(self):
# https://github.com/pandas-dev/pandas/issues/35217
float_index = Index([1.0, 2, 3])
string_index = Index(["1", "2", "3"])

result = float_index.difference(string_index)
tm.assert_index_equal(result, float_index)

result = string_index.difference(float_index)
tm.assert_index_equal(result, string_index)

def test_intersection_uint64_outside_int64_range(self, index_large):
other = Index([2 ** 63, 2 ** 63 + 5, 2 ** 63 + 10, 2 ** 63 + 15, 2 ** 63 + 20])
result = index_large.intersection(other)
expected = Index(np.sort(np.intersect1d(index_large.values, other.values)))
tm.assert_index_equal(result, expected)

result = other.intersection(index_large)
expected = Index(
np.sort(np.asarray(np.intersect1d(index_large.values, other.values)))
)
tm.assert_index_equal(result, expected)

@pytest.mark.parametrize(
"index2,keeps_name",
[
(Index([4, 7, 6, 5, 3], name="index"), True),
(Index([4, 7, 6, 5, 3], name="other"), False),
],
)
def test_intersection_monotonic(self, index2, keeps_name, sort):
index1 = Index([5, 3, 2, 4, 1], name="index")
expected = Index([5, 3, 4])

if keeps_name:
expected.name = "index"

result = index1.intersection(index2, sort=sort)
if sort is None:
expected = expected.sort_values()
tm.assert_index_equal(result, expected)


class TestSetOpsSort:
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_other_special(self, slice_):
# https://github.com/pandas-dev/pandas/issues/24959

idx = Index([1, 0, 2])
# default, sort=None
other = idx[slice_]
tm.assert_index_equal(idx.union(other), idx)
tm.assert_index_equal(other.union(idx), idx)

# sort=False
tm.assert_index_equal(idx.union(other, sort=False), idx)

@pytest.mark.xfail(reason="Not implemented")
@pytest.mark.parametrize("slice_", [slice(None), slice(0)])
def test_union_sort_special_true(self, slice_):
# TODO: decide on True behaviour
# sort=True
idx = Index([1, 0, 2])
# default, sort=None
other = idx[slice_]

result = idx.union(other, sort=True)
expected = Index([0, 1, 2])
tm.assert_index_equal(result, expected)
Loading