Skip to content

Commit 77ee2bf

Browse files
authored
REF/TST: misplaced MultiIndex tests (#32314)
1 parent c20528e commit 77ee2bf

8 files changed

+258
-244
lines changed

pandas/tests/indexes/multi/test_constructors.py

+74-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
from datetime import date, datetime
2+
import itertools
3+
14
import numpy as np
25
import pytest
36

@@ -6,7 +9,7 @@
69
from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
710

811
import pandas as pd
9-
from pandas import Index, MultiIndex, date_range
12+
from pandas import Index, MultiIndex, Series, date_range
1013
import pandas._testing as tm
1114

1215

@@ -723,3 +726,73 @@ def test_index_equal_empty_iterable():
723726
a = MultiIndex(levels=[[], []], codes=[[], []], names=["a", "b"])
724727
b = MultiIndex.from_arrays(arrays=[[], []], names=["a", "b"])
725728
tm.assert_index_equal(a, b)
729+
730+
731+
def test_raise_invalid_sortorder():
732+
# Test that the MultiIndex constructor raise when a incorrect sortorder is given
733+
# GH#28518
734+
735+
levels = [[0, 1], [0, 1, 2]]
736+
737+
# Correct sortorder
738+
MultiIndex(
739+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
740+
)
741+
742+
with pytest.raises(ValueError, match=r".* sortorder 2 with lexsort_depth 1.*"):
743+
MultiIndex(
744+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=2,
745+
)
746+
747+
with pytest.raises(ValueError, match=r".* sortorder 1 with lexsort_depth 0.*"):
748+
MultiIndex(
749+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=1,
750+
)
751+
752+
753+
def test_datetimeindex():
754+
idx1 = pd.DatetimeIndex(
755+
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"] * 2, tz="Asia/Tokyo",
756+
)
757+
idx2 = pd.date_range("2010/01/01", periods=6, freq="M", tz="US/Eastern")
758+
idx = MultiIndex.from_arrays([idx1, idx2])
759+
760+
expected1 = pd.DatetimeIndex(
761+
["2013-04-01 9:00", "2013-04-02 9:00", "2013-04-03 9:00"], tz="Asia/Tokyo"
762+
)
763+
764+
tm.assert_index_equal(idx.levels[0], expected1)
765+
tm.assert_index_equal(idx.levels[1], idx2)
766+
767+
# from datetime combos
768+
# GH 7888
769+
date1 = date.today()
770+
date2 = datetime.today()
771+
date3 = Timestamp.today()
772+
773+
for d1, d2 in itertools.product([date1, date2, date3], [date1, date2, date3]):
774+
index = MultiIndex.from_product([[d1], [d2]])
775+
assert isinstance(index.levels[0], pd.DatetimeIndex)
776+
assert isinstance(index.levels[1], pd.DatetimeIndex)
777+
778+
779+
def test_constructor_with_tz():
780+
781+
index = pd.DatetimeIndex(
782+
["2013/01/01 09:00", "2013/01/02 09:00"], name="dt1", tz="US/Pacific"
783+
)
784+
columns = pd.DatetimeIndex(
785+
["2014/01/01 09:00", "2014/01/02 09:00"], name="dt2", tz="Asia/Tokyo"
786+
)
787+
788+
result = MultiIndex.from_arrays([index, columns])
789+
790+
assert result.names == ["dt1", "dt2"]
791+
tm.assert_index_equal(result.levels[0], index)
792+
tm.assert_index_equal(result.levels[1], columns)
793+
794+
result = MultiIndex.from_arrays([Series(index), Series(columns)])
795+
796+
assert result.names == ["dt1", "dt2"]
797+
tm.assert_index_equal(result.levels[0], index)
798+
tm.assert_index_equal(result.levels[1], columns)

pandas/tests/indexes/multi/test_duplicates.py

+26
Original file line numberDiff line numberDiff line change
@@ -274,3 +274,29 @@ def test_duplicated2():
274274
tm.assert_numpy_array_equal(
275275
mi.duplicated(), np.zeros(len(mi), dtype="bool")
276276
)
277+
278+
279+
def test_duplicated_drop_duplicates():
280+
# GH#4060
281+
idx = MultiIndex.from_arrays(([1, 2, 3, 1, 2, 3], [1, 1, 1, 1, 2, 2]))
282+
283+
expected = np.array([False, False, False, True, False, False], dtype=bool)
284+
duplicated = idx.duplicated()
285+
tm.assert_numpy_array_equal(duplicated, expected)
286+
assert duplicated.dtype == bool
287+
expected = MultiIndex.from_arrays(([1, 2, 3, 2, 3], [1, 1, 1, 2, 2]))
288+
tm.assert_index_equal(idx.drop_duplicates(), expected)
289+
290+
expected = np.array([True, False, False, False, False, False])
291+
duplicated = idx.duplicated(keep="last")
292+
tm.assert_numpy_array_equal(duplicated, expected)
293+
assert duplicated.dtype == bool
294+
expected = MultiIndex.from_arrays(([2, 3, 1, 2, 3], [1, 1, 1, 2, 2]))
295+
tm.assert_index_equal(idx.drop_duplicates(keep="last"), expected)
296+
297+
expected = np.array([True, False, False, True, False, False])
298+
duplicated = idx.duplicated(keep=False)
299+
tm.assert_numpy_array_equal(duplicated, expected)
300+
assert duplicated.dtype == bool
301+
expected = MultiIndex.from_arrays(([2, 3, 2, 3], [1, 1, 2, 2]))
302+
tm.assert_index_equal(idx.drop_duplicates(keep=False), expected)

pandas/tests/indexes/multi/test_format.py

+13-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
import warnings
22

3+
import numpy as np
34
import pytest
45

56
import pandas as pd
6-
from pandas import MultiIndex
7+
from pandas import Index, MultiIndex
78
import pandas._testing as tm
89

910

@@ -76,6 +77,17 @@ def test_repr_max_seq_item_setting(idx):
7677

7778

7879
class TestRepr:
80+
def test_unicode_repr_issues(self):
81+
levels = [Index(["a/\u03c3", "b/\u03c3", "c/\u03c3"]), Index([0, 1])]
82+
codes = [np.arange(3).repeat(2), np.tile(np.arange(2), 3)]
83+
index = MultiIndex(levels=levels, codes=codes)
84+
85+
repr(index.levels)
86+
87+
# FIXME: dont leave commented-out
88+
# NumPy bug
89+
# repr(index.get_level_values(1))
90+
7991
def test_repr(self, idx):
8092
result = idx[:1].__repr__()
8193
expected = """\

pandas/tests/indexes/multi/test_indexing.py

+38
Original file line numberDiff line numberDiff line change
@@ -498,3 +498,41 @@ def test_slice_indexer_with_missing_value(index_arr, expected, start_idx, end_id
498498
idx = MultiIndex.from_arrays(index_arr)
499499
result = idx.slice_indexer(start=start_idx, end=end_idx)
500500
assert result == expected
501+
502+
503+
def test_pyint_engine():
504+
# GH#18519 : when combinations of codes cannot be represented in 64
505+
# bits, the index underlying the MultiIndex engine works with Python
506+
# integers, rather than uint64.
507+
N = 5
508+
keys = [
509+
tuple(l)
510+
for l in [
511+
[0] * 10 * N,
512+
[1] * 10 * N,
513+
[2] * 10 * N,
514+
[np.nan] * N + [2] * 9 * N,
515+
[0] * N + [2] * 9 * N,
516+
[np.nan] * N + [2] * 8 * N + [0] * N,
517+
]
518+
]
519+
# Each level contains 4 elements (including NaN), so it is represented
520+
# in 2 bits, for a total of 2*N*10 = 100 > 64 bits. If we were using a
521+
# 64 bit engine and truncating the first levels, the fourth and fifth
522+
# keys would collide; if truncating the last levels, the fifth and
523+
# sixth; if rotating bits rather than shifting, the third and fifth.
524+
525+
for idx in range(len(keys)):
526+
index = MultiIndex.from_tuples(keys)
527+
assert index.get_loc(keys[idx]) == idx
528+
529+
expected = np.arange(idx + 1, dtype=np.intp)
530+
result = index.get_indexer([keys[i] for i in expected])
531+
tm.assert_numpy_array_equal(result, expected)
532+
533+
# With missing key:
534+
idces = range(len(keys))
535+
expected = np.array([-1] + list(idces), dtype=np.intp)
536+
missing = tuple([0, 1] * 5 * N)
537+
result = index.get_indexer([missing] + [keys[i] for i in idces])
538+
tm.assert_numpy_array_equal(result, expected)
+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
from pandas import MultiIndex
2+
3+
4+
class TestIsLexsorted:
5+
def test_is_lexsorted(self):
6+
levels = [[0, 1], [0, 1, 2]]
7+
8+
index = MultiIndex(
9+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
10+
)
11+
assert index.is_lexsorted()
12+
13+
index = MultiIndex(
14+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]]
15+
)
16+
assert not index.is_lexsorted()
17+
18+
index = MultiIndex(
19+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]]
20+
)
21+
assert not index.is_lexsorted()
22+
assert index.lexsort_depth == 0
23+
24+
25+
class TestLexsortDepth:
26+
def test_lexsort_depth(self):
27+
# Test that lexsort_depth return the correct sortorder
28+
# when it was given to the MultiIndex const.
29+
# GH#28518
30+
31+
levels = [[0, 1], [0, 1, 2]]
32+
33+
index = MultiIndex(
34+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]], sortorder=2
35+
)
36+
assert index.lexsort_depth == 2
37+
38+
index = MultiIndex(
39+
levels=levels, codes=[[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 2, 1]], sortorder=1
40+
)
41+
assert index.lexsort_depth == 1
42+
43+
index = MultiIndex(
44+
levels=levels, codes=[[0, 0, 1, 0, 1, 1], [0, 1, 0, 2, 2, 1]], sortorder=0
45+
)
46+
assert index.lexsort_depth == 0

pandas/tests/indexes/multi/test_missing.py

+10
Original file line numberDiff line numberDiff line change
@@ -141,3 +141,13 @@ def test_nan_stays_float():
141141
assert pd.isna(df0.index.get_level_values(1)).all()
142142
# the following failed in 0.14.1
143143
assert pd.isna(dfm.index.get_level_values(1)[:-1]).all()
144+
145+
146+
def test_tuples_have_na():
147+
index = MultiIndex(
148+
levels=[[1, 0], [0, 1, 2, 3]],
149+
codes=[[1, 1, 1, 1, -1, 0, 0, 0], [0, 1, 2, 3, 0, 1, 2, 3]],
150+
)
151+
152+
assert pd.isna(index[4][0])
153+
assert pd.isna(index.values[4][0])

pandas/tests/indexes/multi/test_reshape.py

+50
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
1+
from datetime import datetime
2+
13
import numpy as np
24
import pytest
5+
import pytz
36

47
import pandas as pd
58
from pandas import Index, MultiIndex
@@ -95,6 +98,53 @@ def test_append(idx):
9598
assert result.equals(idx)
9699

97100

101+
def test_append_index():
102+
idx1 = Index([1.1, 1.2, 1.3])
103+
idx2 = pd.date_range("2011-01-01", freq="D", periods=3, tz="Asia/Tokyo")
104+
idx3 = Index(["A", "B", "C"])
105+
106+
midx_lv2 = MultiIndex.from_arrays([idx1, idx2])
107+
midx_lv3 = MultiIndex.from_arrays([idx1, idx2, idx3])
108+
109+
result = idx1.append(midx_lv2)
110+
111+
# see gh-7112
112+
tz = pytz.timezone("Asia/Tokyo")
113+
expected_tuples = [
114+
(1.1, tz.localize(datetime(2011, 1, 1))),
115+
(1.2, tz.localize(datetime(2011, 1, 2))),
116+
(1.3, tz.localize(datetime(2011, 1, 3))),
117+
]
118+
expected = Index([1.1, 1.2, 1.3] + expected_tuples)
119+
tm.assert_index_equal(result, expected)
120+
121+
result = midx_lv2.append(idx1)
122+
expected = Index(expected_tuples + [1.1, 1.2, 1.3])
123+
tm.assert_index_equal(result, expected)
124+
125+
result = midx_lv2.append(midx_lv2)
126+
expected = MultiIndex.from_arrays([idx1.append(idx1), idx2.append(idx2)])
127+
tm.assert_index_equal(result, expected)
128+
129+
result = midx_lv2.append(midx_lv3)
130+
tm.assert_index_equal(result, expected)
131+
132+
result = midx_lv3.append(midx_lv2)
133+
expected = Index._simple_new(
134+
np.array(
135+
[
136+
(1.1, tz.localize(datetime(2011, 1, 1)), "A"),
137+
(1.2, tz.localize(datetime(2011, 1, 2)), "B"),
138+
(1.3, tz.localize(datetime(2011, 1, 3)), "C"),
139+
]
140+
+ expected_tuples,
141+
dtype=object,
142+
),
143+
None,
144+
)
145+
tm.assert_index_equal(result, expected)
146+
147+
98148
def test_repeat():
99149
reps = 2
100150
numbers = [1, 2, 3]

0 commit comments

Comments
 (0)