Skip to content

Commit 9130da9

Browse files
authored
REF: MultiIndex Indexing tests (#33053)
1 parent 845c50c commit 9130da9

File tree

5 files changed

+139
-138
lines changed

5 files changed

+139
-138
lines changed

pandas/tests/indexes/multi/test_analytics.py

-17
Original file line numberDiff line numberDiff line change
@@ -57,23 +57,6 @@ def test_truncate():
5757
index.truncate(3, 1)
5858

5959

60-
def test_where():
61-
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
62-
63-
msg = r"\.where is not supported for MultiIndex operations"
64-
with pytest.raises(NotImplementedError, match=msg):
65-
i.where(True)
66-
67-
68-
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
69-
def test_where_array_like(klass):
70-
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
71-
cond = [False, True]
72-
msg = r"\.where is not supported for MultiIndex operations"
73-
with pytest.raises(NotImplementedError, match=msg):
74-
i.where(klass(cond))
75-
76-
7760
# TODO: reshape
7861

7962

pandas/tests/indexes/multi/test_get_level_values.py

+79-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
from pandas import MultiIndex, Timestamp, date_range
1+
import numpy as np
2+
3+
import pandas as pd
4+
from pandas import CategoricalIndex, Index, MultiIndex, Timestamp, date_range
5+
import pandas._testing as tm
26

37

48
class TestGetLevelValues:
@@ -11,3 +15,77 @@ def test_get_level_values_box_datetime64(self):
1115
index = MultiIndex(levels=levels, codes=codes)
1216

1317
assert isinstance(index.get_level_values(0)[0], Timestamp)
18+
19+
20+
def test_get_level_values(idx):
21+
result = idx.get_level_values(0)
22+
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
23+
tm.assert_index_equal(result, expected)
24+
assert result.name == "first"
25+
26+
result = idx.get_level_values("first")
27+
expected = idx.get_level_values(0)
28+
tm.assert_index_equal(result, expected)
29+
30+
# GH 10460
31+
index = MultiIndex(
32+
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
33+
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
34+
)
35+
36+
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
37+
tm.assert_index_equal(index.get_level_values(0), exp)
38+
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
39+
tm.assert_index_equal(index.get_level_values(1), exp)
40+
41+
42+
def test_get_level_values_all_na():
43+
# GH#17924 when level entirely consists of nan
44+
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
45+
index = pd.MultiIndex.from_arrays(arrays)
46+
result = index.get_level_values(0)
47+
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
48+
tm.assert_index_equal(result, expected)
49+
50+
result = index.get_level_values(1)
51+
expected = pd.Index(["a", np.nan, 1], dtype=object)
52+
tm.assert_index_equal(result, expected)
53+
54+
55+
def test_get_level_values_int_with_na():
56+
# GH#17924
57+
arrays = [["a", "b", "b"], [1, np.nan, 2]]
58+
index = pd.MultiIndex.from_arrays(arrays)
59+
result = index.get_level_values(1)
60+
expected = Index([1, np.nan, 2])
61+
tm.assert_index_equal(result, expected)
62+
63+
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
64+
index = pd.MultiIndex.from_arrays(arrays)
65+
result = index.get_level_values(1)
66+
expected = Index([np.nan, np.nan, 2])
67+
tm.assert_index_equal(result, expected)
68+
69+
70+
def test_get_level_values_na():
71+
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
72+
index = pd.MultiIndex.from_arrays(arrays)
73+
result = index.get_level_values(0)
74+
expected = pd.Index([np.nan, np.nan, np.nan])
75+
tm.assert_index_equal(result, expected)
76+
77+
result = index.get_level_values(1)
78+
expected = pd.Index(["a", np.nan, 1])
79+
tm.assert_index_equal(result, expected)
80+
81+
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
82+
index = pd.MultiIndex.from_arrays(arrays)
83+
result = index.get_level_values(1)
84+
expected = pd.DatetimeIndex([0, 1, pd.NaT])
85+
tm.assert_index_equal(result, expected)
86+
87+
arrays = [[], []]
88+
index = pd.MultiIndex.from_arrays(arrays)
89+
result = index.get_level_values(0)
90+
expected = pd.Index([], dtype=object)
91+
tm.assert_index_equal(result, expected)

pandas/tests/indexes/multi/test_get_set.py

+1-85
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
import pytest
33

44
import pandas as pd
5-
from pandas import CategoricalIndex, Index, MultiIndex
5+
from pandas import CategoricalIndex, MultiIndex
66
import pandas._testing as tm
77

88

@@ -27,90 +27,6 @@ def test_get_level_number_integer(idx):
2727
idx._get_level_number("fourth")
2828

2929

30-
def test_get_level_values(idx):
31-
result = idx.get_level_values(0)
32-
expected = Index(["foo", "foo", "bar", "baz", "qux", "qux"], name="first")
33-
tm.assert_index_equal(result, expected)
34-
assert result.name == "first"
35-
36-
result = idx.get_level_values("first")
37-
expected = idx.get_level_values(0)
38-
tm.assert_index_equal(result, expected)
39-
40-
# GH 10460
41-
index = MultiIndex(
42-
levels=[CategoricalIndex(["A", "B"]), CategoricalIndex([1, 2, 3])],
43-
codes=[np.array([0, 0, 0, 1, 1, 1]), np.array([0, 1, 2, 0, 1, 2])],
44-
)
45-
46-
exp = CategoricalIndex(["A", "A", "A", "B", "B", "B"])
47-
tm.assert_index_equal(index.get_level_values(0), exp)
48-
exp = CategoricalIndex([1, 2, 3, 1, 2, 3])
49-
tm.assert_index_equal(index.get_level_values(1), exp)
50-
51-
52-
def test_get_value_duplicates():
53-
index = MultiIndex(
54-
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
55-
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
56-
names=["tag", "day"],
57-
)
58-
59-
assert index.get_loc("D") == slice(0, 3)
60-
61-
62-
def test_get_level_values_all_na():
63-
# GH 17924 when level entirely consists of nan
64-
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
65-
index = pd.MultiIndex.from_arrays(arrays)
66-
result = index.get_level_values(0)
67-
expected = pd.Index([np.nan, np.nan, np.nan], dtype=np.float64)
68-
tm.assert_index_equal(result, expected)
69-
70-
result = index.get_level_values(1)
71-
expected = pd.Index(["a", np.nan, 1], dtype=object)
72-
tm.assert_index_equal(result, expected)
73-
74-
75-
def test_get_level_values_int_with_na():
76-
# GH 17924
77-
arrays = [["a", "b", "b"], [1, np.nan, 2]]
78-
index = pd.MultiIndex.from_arrays(arrays)
79-
result = index.get_level_values(1)
80-
expected = Index([1, np.nan, 2])
81-
tm.assert_index_equal(result, expected)
82-
83-
arrays = [["a", "b", "b"], [np.nan, np.nan, 2]]
84-
index = pd.MultiIndex.from_arrays(arrays)
85-
result = index.get_level_values(1)
86-
expected = Index([np.nan, np.nan, 2])
87-
tm.assert_index_equal(result, expected)
88-
89-
90-
def test_get_level_values_na():
91-
arrays = [[np.nan, np.nan, np.nan], ["a", np.nan, 1]]
92-
index = pd.MultiIndex.from_arrays(arrays)
93-
result = index.get_level_values(0)
94-
expected = pd.Index([np.nan, np.nan, np.nan])
95-
tm.assert_index_equal(result, expected)
96-
97-
result = index.get_level_values(1)
98-
expected = pd.Index(["a", np.nan, 1])
99-
tm.assert_index_equal(result, expected)
100-
101-
arrays = [["a", "b", "b"], pd.DatetimeIndex([0, 1, pd.NaT])]
102-
index = pd.MultiIndex.from_arrays(arrays)
103-
result = index.get_level_values(1)
104-
expected = pd.DatetimeIndex([0, 1, pd.NaT])
105-
tm.assert_index_equal(result, expected)
106-
107-
arrays = [[], []]
108-
index = pd.MultiIndex.from_arrays(arrays)
109-
result = index.get_level_values(0)
110-
expected = pd.Index([], dtype=object)
111-
tm.assert_index_equal(result, expected)
112-
113-
11430
def test_set_name_methods(idx, index_names):
11531
# so long as these are synonyms, we don't need to test set_names
11632
assert idx.rename == idx.set_names

pandas/tests/indexes/multi/test_indexing.py

+59
Original file line numberDiff line numberDiff line change
@@ -441,6 +441,65 @@ def test_get_loc_with_values_including_missing_values(self):
441441
expected = slice(2, 4, None)
442442
assert idx.get_loc((np.nan, 1)) == expected
443443

444+
def test_get_loc_duplicates2(self):
445+
# TODO: de-duplicate with test_get_loc_duplicates above?
446+
index = MultiIndex(
447+
levels=[["D", "B", "C"], [0, 26, 27, 37, 57, 67, 75, 82]],
448+
codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
449+
names=["tag", "day"],
450+
)
451+
452+
assert index.get_loc("D") == slice(0, 3)
453+
454+
455+
class TestWhere:
456+
def test_where(self):
457+
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
458+
459+
msg = r"\.where is not supported for MultiIndex operations"
460+
with pytest.raises(NotImplementedError, match=msg):
461+
i.where(True)
462+
463+
@pytest.mark.parametrize("klass", [list, tuple, np.array, pd.Series])
464+
def test_where_array_like(self, klass):
465+
i = MultiIndex.from_tuples([("A", 1), ("A", 2)])
466+
cond = [False, True]
467+
msg = r"\.where is not supported for MultiIndex operations"
468+
with pytest.raises(NotImplementedError, match=msg):
469+
i.where(klass(cond))
470+
471+
472+
class TestContains:
473+
def test_contains_top_level(self):
474+
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
475+
assert "A" in midx
476+
assert "A" not in midx._engine
477+
478+
def test_contains_with_nat(self):
479+
# MI with a NaT
480+
mi = MultiIndex(
481+
levels=[["C"], pd.date_range("2012-01-01", periods=5)],
482+
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
483+
names=[None, "B"],
484+
)
485+
assert ("C", pd.Timestamp("2012-01-01")) in mi
486+
for val in mi.values:
487+
assert val in mi
488+
489+
def test_contains(self, idx):
490+
assert ("foo", "two") in idx
491+
assert ("bar", "two") not in idx
492+
assert None not in idx
493+
494+
def test_contains_with_missing_value(self):
495+
# GH#19132
496+
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
497+
assert np.nan in idx
498+
499+
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
500+
assert np.nan not in idx
501+
assert (1, np.nan) in idx
502+
444503

445504
def test_timestamp_multiindex_indexer():
446505
# https://github.com/pandas-dev/pandas/issues/26944

pandas/tests/indexes/multi/test_contains.py renamed to pandas/tests/indexes/multi/test_isin.py

-35
Original file line numberDiff line numberDiff line change
@@ -3,35 +3,10 @@
33

44
from pandas.compat import PYPY
55

6-
import pandas as pd
76
from pandas import MultiIndex
87
import pandas._testing as tm
98

109

11-
def test_contains_top_level():
12-
midx = MultiIndex.from_product([["A", "B"], [1, 2]])
13-
assert "A" in midx
14-
assert "A" not in midx._engine
15-
16-
17-
def test_contains_with_nat():
18-
# MI with a NaT
19-
mi = MultiIndex(
20-
levels=[["C"], pd.date_range("2012-01-01", periods=5)],
21-
codes=[[0, 0, 0, 0, 0, 0], [-1, 0, 1, 2, 3, 4]],
22-
names=[None, "B"],
23-
)
24-
assert ("C", pd.Timestamp("2012-01-01")) in mi
25-
for val in mi.values:
26-
assert val in mi
27-
28-
29-
def test_contains(idx):
30-
assert ("foo", "two") in idx
31-
assert ("bar", "two") not in idx
32-
assert None not in idx
33-
34-
3510
@pytest.mark.skipif(not PYPY, reason="tuples cmp recursively on PyPy")
3611
def test_isin_nan_pypy():
3712
idx = MultiIndex.from_arrays([["foo", "bar"], [1.0, np.nan]])
@@ -100,16 +75,6 @@ def test_isin_level_kwarg():
10075
idx.isin(vals_1, level="C")
10176

10277

103-
def test_contains_with_missing_value():
104-
# issue 19132
105-
idx = MultiIndex.from_arrays([[1, np.nan, 2]])
106-
assert np.nan in idx
107-
108-
idx = MultiIndex.from_arrays([[1, 2], [np.nan, 3]])
109-
assert np.nan not in idx
110-
assert (1, np.nan) in idx
111-
112-
11378
@pytest.mark.parametrize(
11479
"labels,expected,level",
11580
[

0 commit comments

Comments
 (0)