Skip to content

Commit d64933a

Browse files
simonjayhawkinsPingviinituutti
authored andcommitted
TST: move mi tests from tests/indexing/ to tests/indexing/multiindex/ (pandas-dev#24417)
1 parent 144f888 commit d64933a

14 files changed

+358
-337
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.compat import lrange, lzip, range
5+
6+
from pandas import DataFrame, MultiIndex, Series
7+
from pandas.core import common as com
8+
import pandas.util.testing as tm
9+
10+
11+
def test_detect_chained_assignment():
12+
# Inplace ops, originally from:
13+
# http://stackoverflow.com/questions/20508968/series-fillna-in-a-multiindex-dataframe-does-not-fill-is-this-a-bug
14+
a = [12, 23]
15+
b = [123, None]
16+
c = [1234, 2345]
17+
d = [12345, 23456]
18+
tuples = [('eyes', 'left'), ('eyes', 'right'), ('ears', 'left'),
19+
('ears', 'right')]
20+
events = {('eyes', 'left'): a,
21+
('eyes', 'right'): b,
22+
('ears', 'left'): c,
23+
('ears', 'right'): d}
24+
multiind = MultiIndex.from_tuples(tuples, names=['part', 'side'])
25+
zed = DataFrame(events, index=['a', 'b'], columns=multiind)
26+
27+
with pytest.raises(com.SettingWithCopyError):
28+
zed['eyes']['right'].fillna(value=555, inplace=True)
29+
30+
31+
def test_cache_updating():
32+
# 5216
33+
# make sure that we don't try to set a dead cache
34+
a = np.random.rand(10, 3)
35+
df = DataFrame(a, columns=['x', 'y', 'z'])
36+
tuples = [(i, j) for i in range(5) for j in range(2)]
37+
index = MultiIndex.from_tuples(tuples)
38+
df.index = index
39+
40+
# setting via chained assignment
41+
# but actually works, since everything is a view
42+
df.loc[0]['z'].iloc[0] = 1.
43+
result = df.loc[(0, 0), 'z']
44+
assert result == 1
45+
46+
# correct setting
47+
df.loc[(0, 0), 'z'] = 2
48+
result = df.loc[(0, 0), 'z']
49+
assert result == 2
50+
51+
52+
def test_indexer_caching():
53+
# GH5727
54+
# make sure that indexers are in the _internal_names_set
55+
n = 1000001
56+
arrays = [lrange(n), lrange(n)]
57+
index = MultiIndex.from_tuples(lzip(*arrays))
58+
s = Series(np.zeros(n), index=index)
59+
str(s)
60+
61+
# setitem
62+
expected = Series(np.ones(n), index=index)
63+
s = Series(np.zeros(n), index=index)
64+
s[s == 0] = 1
65+
tm.assert_series_equal(s, expected)

pandas/tests/indexing/multiindex/test_getitem.py

+41-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.compat import lrange, range, u, zip
4+
from pandas.compat import StringIO, lrange, range, u, zip
55

66
import pandas as pd
77
from pandas import DataFrame, Index, MultiIndex, Series
@@ -343,3 +343,43 @@ def test_mixed_depth_get(unicode_strings):
343343
expected = df['routine1', 'result1', '']
344344
expected = expected.rename(('routine1', 'result1'))
345345
tm.assert_series_equal(result, expected)
346+
347+
348+
def test_mi_access():
349+
350+
# GH 4145
351+
data = """h1 main h3 sub h5
352+
0 a A 1 A1 1
353+
1 b B 2 B1 2
354+
2 c B 3 A1 3
355+
3 d A 4 B2 4
356+
4 e A 5 B2 5
357+
5 f B 6 A2 6
358+
"""
359+
360+
df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
361+
df2 = df.set_index(['main', 'sub']).T.sort_index(1)
362+
index = Index(['h1', 'h3', 'h5'])
363+
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
364+
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
365+
366+
result = df2.loc[:, ('A', 'A1')]
367+
tm.assert_frame_equal(result, expected)
368+
369+
result = df2[('A', 'A1')]
370+
tm.assert_frame_equal(result, expected)
371+
372+
# GH 4146, not returning a block manager when selecting a unique index
373+
# from a duplicate index
374+
# as of 4879, this returns a Series (which is similar to what happens
375+
# with a non-unique)
376+
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
377+
result = df2['A']['A1']
378+
tm.assert_series_equal(result, expected)
379+
380+
# selecting a non_unique from the 2nd level
381+
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
382+
index=Index(['B2', 'B2'], name='sub'),
383+
columns=['h1', 'h3', 'h5'], ).T
384+
result = df2['A']['B2']
385+
tm.assert_frame_equal(result, expected)

pandas/tests/indexing/multiindex/test_iloc.py

+27
Original file line numberDiff line numberDiff line change
@@ -115,3 +115,30 @@ def test_iloc_integer_locations():
115115
result = DataFrame([[df.iloc[r, c] for c in range(2)] for r in range(5)])
116116

117117
tm.assert_frame_equal(result, expected)
118+
119+
120+
@pytest.mark.parametrize(
121+
'data, indexes, values, expected_k', [
122+
# test without indexer value in first level of MultiIndex
123+
([[2, 22, 5], [2, 33, 6]], [0, -1, 1], [2, 3, 1], [7, 10]),
124+
# test like code sample 1 in the issue
125+
([[1, 22, 555], [1, 33, 666]], [0, -1, 1], [200, 300, 100],
126+
[755, 1066]),
127+
# test like code sample 2 in the issue
128+
([[1, 3, 7], [2, 4, 8]], [0, -1, 1], [10, 10, 1000], [17, 1018]),
129+
# test like code sample 3 in the issue
130+
([[1, 11, 4], [2, 22, 5], [3, 33, 6]], [0, -1, 1], [4, 7, 10],
131+
[8, 15, 13])
132+
])
133+
def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
134+
# GH17148
135+
df = DataFrame(data=data, columns=['i', 'j', 'k'])
136+
df = df.set_index(['i', 'j'])
137+
138+
series = df.k.copy()
139+
for i, v in zip(indexes, values):
140+
series.iloc[i] += v
141+
142+
df['k'] = expected_k
143+
expected = df.k
144+
tm.assert_series_equal(series, expected)
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
# -*- coding: utf-8 -*-
2+
3+
import warnings
4+
5+
import numpy as np
6+
import pytest
7+
8+
import pandas as pd
9+
from pandas import DataFrame, MultiIndex, Series
10+
import pandas.util.testing as tm
11+
12+
13+
@pytest.mark.slow
14+
@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")
15+
def test_multiindex_get_loc(): # GH7724, GH2646
16+
17+
with warnings.catch_warnings(record=True):
18+
19+
# test indexing into a multi-index before & past the lexsort depth
20+
from numpy.random import randint, choice, randn
21+
cols = ['jim', 'joe', 'jolie', 'joline', 'jolia']
22+
23+
def validate(mi, df, key):
24+
mask = np.ones(len(df)).astype('bool')
25+
26+
# test for all partials of this key
27+
for i, k in enumerate(key):
28+
mask &= df.iloc[:, i] == k
29+
30+
if not mask.any():
31+
assert key[:i + 1] not in mi.index
32+
continue
33+
34+
assert key[:i + 1] in mi.index
35+
right = df[mask].copy()
36+
37+
if i + 1 != len(key): # partial key
38+
right.drop(cols[:i + 1], axis=1, inplace=True)
39+
right.set_index(cols[i + 1:-1], inplace=True)
40+
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
41+
42+
else: # full key
43+
right.set_index(cols[:-1], inplace=True)
44+
if len(right) == 1: # single hit
45+
right = Series(right['jolia'].values,
46+
name=right.index[0],
47+
index=['jolia'])
48+
tm.assert_series_equal(mi.loc[key[:i + 1]], right)
49+
else: # multi hit
50+
tm.assert_frame_equal(mi.loc[key[:i + 1]], right)
51+
52+
def loop(mi, df, keys):
53+
for key in keys:
54+
validate(mi, df, key)
55+
56+
n, m = 1000, 50
57+
58+
vals = [randint(0, 10, n), choice(
59+
list('abcdefghij'), n), choice(
60+
pd.date_range('20141009', periods=10).tolist(), n), choice(
61+
list('ZYXWVUTSRQ'), n), randn(n)]
62+
vals = list(map(tuple, zip(*vals)))
63+
64+
# bunch of keys for testing
65+
keys = [randint(0, 11, m), choice(
66+
list('abcdefghijk'), m), choice(
67+
pd.date_range('20141009', periods=11).tolist(), m), choice(
68+
list('ZYXWVUTSRQP'), m)]
69+
keys = list(map(tuple, zip(*keys)))
70+
keys += list(map(lambda t: t[:-1], vals[::n // m]))
71+
72+
# covers both unique index and non-unique index
73+
df = DataFrame(vals, columns=cols)
74+
a, b = pd.concat([df, df]), df.drop_duplicates(subset=cols[:-1])
75+
76+
for frame in a, b:
77+
for i in range(5): # lexsort depth
78+
df = frame.copy() if i == 0 else frame.sort_values(
79+
by=cols[:i])
80+
mi = df.set_index(cols[:-1])
81+
assert not mi.index.lexsort_depth < i
82+
loop(mi, df, keys)
83+
84+
85+
@pytest.mark.slow
86+
def test_large_mi_dataframe_indexing():
87+
# GH10645
88+
result = MultiIndex.from_arrays([range(10 ** 6), range(10 ** 6)])
89+
assert (not (10 ** 6, 0) in result)

pandas/tests/indexing/multiindex/test_ix.py

+29
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,10 @@
33
import pytest
44

55
from pandas.compat import lrange
6+
from pandas.errors import PerformanceWarning
7+
8+
from pandas import DataFrame, MultiIndex
9+
from pandas.util import testing as tm
610

711

812
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
@@ -25,3 +29,28 @@ def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
2529
df.columns = lrange(3)
2630
df.ix[('bar', 'two'), 1] = 7
2731
assert df.loc[('bar', 'two'), 1] == 7
32+
33+
def test_ix_general(self):
34+
35+
# ix general issues
36+
37+
# GH 2817
38+
data = {'amount': {0: 700, 1: 600, 2: 222, 3: 333, 4: 444},
39+
'col': {0: 3.5, 1: 3.5, 2: 4.0, 3: 4.0, 4: 4.0},
40+
'year': {0: 2012, 1: 2011, 2: 2012, 3: 2012, 4: 2012}}
41+
df = DataFrame(data).set_index(keys=['col', 'year'])
42+
key = 4.0, 2012
43+
44+
# emits a PerformanceWarning, ok
45+
with tm.assert_produces_warning(PerformanceWarning):
46+
tm.assert_frame_equal(df.loc[key], df.iloc[2:])
47+
48+
# this is ok
49+
df.sort_index(inplace=True)
50+
res = df.loc[key]
51+
52+
# col has float dtype, result should be Float64Index
53+
index = MultiIndex.from_arrays([[4.] * 3, [2012] * 3],
54+
names=['col', 'year'])
55+
expected = DataFrame({'amount': [222, 333, 444]}, index=index)
56+
tm.assert_frame_equal(res, expected)

pandas/tests/indexing/multiindex/test_loc.py

+73-1
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
1+
import itertools
12
from warnings import catch_warnings
23

34
import numpy as np
45
import pytest
56

6-
from pandas import DataFrame, MultiIndex, Series
7+
from pandas import DataFrame, Index, MultiIndex, Series
78
from pandas.util import testing as tm
89

910

@@ -175,3 +176,74 @@ def test_get_loc_single_level(self, single_level_multiindex):
175176
index=single_level)
176177
for k in single_level.values:
177178
s[k]
179+
180+
def test_loc_getitem_int_slice(self):
181+
# GH 3053
182+
# loc should treat integer slices like label slices
183+
184+
index = MultiIndex.from_tuples([t for t in itertools.product(
185+
[6, 7, 8], ['a', 'b'])])
186+
df = DataFrame(np.random.randn(6, 6), index, index)
187+
result = df.loc[6:8, :]
188+
expected = df
189+
tm.assert_frame_equal(result, expected)
190+
191+
index = MultiIndex.from_tuples([t
192+
for t in itertools.product(
193+
[10, 20, 30], ['a', 'b'])])
194+
df = DataFrame(np.random.randn(6, 6), index, index)
195+
result = df.loc[20:30, :]
196+
expected = df.iloc[2:]
197+
tm.assert_frame_equal(result, expected)
198+
199+
# doc examples
200+
result = df.loc[10, :]
201+
expected = df.iloc[0:2]
202+
expected.index = ['a', 'b']
203+
tm.assert_frame_equal(result, expected)
204+
205+
result = df.loc[:, 10]
206+
# expected = df.ix[:,10] (this fails)
207+
expected = df[10]
208+
tm.assert_frame_equal(result, expected)
209+
210+
@pytest.mark.parametrize(
211+
'indexer_type_1',
212+
(list, tuple, set, slice, np.ndarray, Series, Index))
213+
@pytest.mark.parametrize(
214+
'indexer_type_2',
215+
(list, tuple, set, slice, np.ndarray, Series, Index))
216+
def test_loc_getitem_nested_indexer(self, indexer_type_1, indexer_type_2):
217+
# GH #19686
218+
# .loc should work with nested indexers which can be
219+
# any list-like objects (see `pandas.api.types.is_list_like`) or slices
220+
221+
def convert_nested_indexer(indexer_type, keys):
222+
if indexer_type == np.ndarray:
223+
return np.array(keys)
224+
if indexer_type == slice:
225+
return slice(*keys)
226+
return indexer_type(keys)
227+
228+
a = [10, 20, 30]
229+
b = [1, 2, 3]
230+
index = MultiIndex.from_product([a, b])
231+
df = DataFrame(
232+
np.arange(len(index), dtype='int64'),
233+
index=index, columns=['Data'])
234+
235+
keys = ([10, 20], [2, 3])
236+
types = (indexer_type_1, indexer_type_2)
237+
238+
# check indexers with all the combinations of nested objects
239+
# of all the valid types
240+
indexer = tuple(
241+
convert_nested_indexer(indexer_type, k)
242+
for indexer_type, k in zip(types, keys))
243+
244+
result = df.loc[indexer, 'Data']
245+
expected = Series(
246+
[1, 2, 4, 5], name='Data',
247+
index=MultiIndex.from_product(keys))
248+
249+
tm.assert_series_equal(result, expected)

pandas/tests/indexing/multiindex/test_multiindex.py

+16-1
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from pandas.errors import PerformanceWarning
77

88
import pandas as pd
9-
from pandas import DataFrame, MultiIndex, Series
9+
from pandas import DataFrame, Index, MultiIndex, Series
1010
from pandas.util import testing as tm
1111

1212

@@ -69,3 +69,18 @@ def test_indexing_over_hashtable_size_cutoff(self):
6969
assert s[("a", 7)] == 7
7070

7171
_index._SIZE_CUTOFF = old_cutoff
72+
73+
def test_multi_nan_indexing(self):
74+
75+
# GH 3588
76+
df = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
77+
'b': ["C1", "C2", "C3", "C4"],
78+
"c": [10, 15, np.nan, 20]})
79+
result = df.set_index(['a', 'b'], drop=False)
80+
expected = DataFrame({"a": ['R1', 'R2', np.nan, 'R4'],
81+
'b': ["C1", "C2", "C3", "C4"],
82+
"c": [10, 15, np.nan, 20]},
83+
index=[Index(['R1', 'R2', np.nan, 'R4'],
84+
name='a'),
85+
Index(['C1', 'C2', 'C3', 'C4'], name='b')])
86+
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)