Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 005f44e

Browse files
simonjayhawkinsjreback
authored andcommittedNov 29, 2018
TST: split up pandas/tests/indexing/test_multiindex.py (#23912)
1 parent d887927 commit 005f44e

16 files changed

+2363
-2249
lines changed
 

‎pandas/tests/indexing/multiindex/__init__.py

Whitespace-only changes.
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import DataFrame, Index, MultiIndex
5+
from pandas.util import testing as tm
6+
7+
8+
@pytest.fixture
9+
def multiindex_dataframe_random_data():
10+
"""DataFrame with 2 level MultiIndex with random data"""
11+
index = MultiIndex(levels=[['foo', 'bar', 'baz', 'qux'], ['one', 'two',
12+
'three']],
13+
labels=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3],
14+
[0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
15+
names=['first', 'second'])
16+
return DataFrame(np.random.randn(10, 3), index=index,
17+
columns=Index(['A', 'B', 'C'], name='exp'))
18+
19+
20+
@pytest.fixture
21+
def multiindex_year_month_day_dataframe_random_data():
22+
"""DataFrame with 3 level MultiIndex (year, month, day) covering
23+
first 100 business days from 2000-01-01 with random data"""
24+
tm.N = 100
25+
tdf = tm.makeTimeDataFrame()
26+
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month,
27+
lambda x: x.day]).sum()
28+
# use Int64Index, to make sure things work
29+
ymd.index.set_levels([lev.astype('i8') for lev in ymd.index.levels],
30+
inplace=True)
31+
ymd.index.set_names(['year', 'month', 'day'], inplace=True)
32+
return ymd
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
from datetime import datetime
2+
3+
import numpy as np
4+
5+
from pandas import Index, Period, Series, period_range
6+
7+
8+
def test_multiindex_period_datetime():
9+
# GH4861, using datetime in period of multiindex raises exception
10+
11+
idx1 = Index(['a', 'a', 'a', 'b', 'b'])
12+
idx2 = period_range('2012-01', periods=len(idx1), freq='M')
13+
s = Series(np.random.randn(len(idx1)), [idx1, idx2])
14+
15+
# try Period as index
16+
expected = s.iloc[0]
17+
result = s.loc['a', Period('2012-01')]
18+
assert result == expected
19+
20+
# try datetime as index
21+
result = s.loc['a', datetime(2012, 1, 1)]
22+
assert result == expected
Lines changed: 345 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,345 @@
1+
from warnings import catch_warnings, simplefilter
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas.compat import lrange, range, u, zip
7+
8+
import pandas as pd
9+
from pandas import DataFrame, Index, MultiIndex, Series, date_range
10+
import pandas.core.common as com
11+
from pandas.util import testing as tm
12+
13+
14+
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
15+
class TestMultiIndexGetItem(object):
16+
17+
def test_series_getitem_multiindex(self):
18+
19+
# GH 6018
20+
# series regression getitem with a multi-index
21+
22+
s = Series([1, 2, 3])
23+
s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)])
24+
25+
result = s[:, 0]
26+
expected = Series([1], index=[0])
27+
tm.assert_series_equal(result, expected)
28+
29+
result = s.loc[:, 1]
30+
expected = Series([2, 3], index=[1, 2])
31+
tm.assert_series_equal(result, expected)
32+
33+
# xs
34+
result = s.xs(0, level=0)
35+
expected = Series([1], index=[0])
36+
tm.assert_series_equal(result, expected)
37+
38+
result = s.xs(1, level=1)
39+
expected = Series([2, 3], index=[1, 2])
40+
tm.assert_series_equal(result, expected)
41+
42+
# GH6258
43+
dt = list(date_range('20130903', periods=3))
44+
idx = MultiIndex.from_product([list('AB'), dt])
45+
s = Series([1, 3, 4, 1, 3, 4], index=idx)
46+
47+
result = s.xs('20130903', level=1)
48+
expected = Series([1, 1], index=list('AB'))
49+
tm.assert_series_equal(result, expected)
50+
51+
# GH5684
52+
idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'),
53+
('b', 'two')])
54+
s = Series([1, 2, 3, 4], index=idx)
55+
s.index.set_names(['L1', 'L2'], inplace=True)
56+
result = s.xs('one', level='L2')
57+
expected = Series([1, 3], index=['a', 'b'])
58+
expected.index.set_names(['L1'], inplace=True)
59+
tm.assert_series_equal(result, expected)
60+
61+
def test_getitem_duplicates_multiindex(self):
62+
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
63+
# the appropriate error, only in PY3 of course!
64+
65+
index = MultiIndex(levels=[['D', 'B', 'C'],
66+
[0, 26, 27, 37, 57, 67, 75, 82]],
67+
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
68+
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
69+
names=['tag', 'day'])
70+
arr = np.random.randn(len(index), 1)
71+
df = DataFrame(arr, index=index, columns=['val'])
72+
result = df.val['D']
73+
expected = Series(arr.ravel()[0:3], name='val', index=Index(
74+
[26, 37, 57], name='day'))
75+
tm.assert_series_equal(result, expected)
76+
77+
def f():
78+
df.val['A']
79+
80+
pytest.raises(KeyError, f)
81+
82+
def f():
83+
df.val['X']
84+
85+
pytest.raises(KeyError, f)
86+
87+
# A is treated as a special Timestamp
88+
index = MultiIndex(levels=[['A', 'B', 'C'],
89+
[0, 26, 27, 37, 57, 67, 75, 82]],
90+
labels=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2],
91+
[1, 3, 4, 6, 0, 2, 2, 3, 5, 7]],
92+
names=['tag', 'day'])
93+
df = DataFrame(arr, index=index, columns=['val'])
94+
result = df.val['A']
95+
expected = Series(arr.ravel()[0:3], name='val', index=Index(
96+
[26, 37, 57], name='day'))
97+
tm.assert_series_equal(result, expected)
98+
99+
def f():
100+
df.val['X']
101+
102+
pytest.raises(KeyError, f)
103+
104+
# GH 7866
105+
# multi-index slicing with missing indexers
106+
idx = MultiIndex.from_product([['A', 'B', 'C'],
107+
['foo', 'bar', 'baz']],
108+
names=['one', 'two'])
109+
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
110+
111+
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
112+
names=['one', 'two'])
113+
expected = Series(np.arange(3, dtype='int64'),
114+
index=exp_idx).sort_index()
115+
116+
result = s.loc[['A']]
117+
tm.assert_series_equal(result, expected)
118+
result = s.loc[['A', 'D']]
119+
tm.assert_series_equal(result, expected)
120+
121+
# not any values found
122+
pytest.raises(KeyError, lambda: s.loc[['D']])
123+
124+
# empty ok
125+
result = s.loc[[]]
126+
expected = s.iloc[[]]
127+
tm.assert_series_equal(result, expected)
128+
129+
idx = pd.IndexSlice
130+
expected = Series([0, 3, 6], index=MultiIndex.from_product(
131+
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
132+
133+
result = s.loc[idx[:, ['foo']]]
134+
tm.assert_series_equal(result, expected)
135+
result = s.loc[idx[:, ['foo', 'bah']]]
136+
tm.assert_series_equal(result, expected)
137+
138+
# GH 8737
139+
# empty indexer
140+
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
141+
['alpha', 'beta']))
142+
df = DataFrame(
143+
np.random.randn(5, 6), index=range(5), columns=multi_index)
144+
df = df.sort_index(level=0, axis=1)
145+
146+
expected = DataFrame(index=range(5),
147+
columns=multi_index.reindex([])[0])
148+
result1 = df.loc[:, ([], slice(None))]
149+
result2 = df.loc[:, (['foo'], [])]
150+
tm.assert_frame_equal(result1, expected)
151+
tm.assert_frame_equal(result2, expected)
152+
153+
# regression from < 0.14.0
154+
# GH 7914
155+
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
156+
columns=MultiIndex.from_tuples([('functs', 'mean'),
157+
('functs', 'median')]),
158+
index=['function', 'name'])
159+
result = df.loc['function', ('functs', 'mean')]
160+
assert result == np.mean
161+
162+
def test_getitem_simple(self, multiindex_dataframe_random_data):
163+
frame = multiindex_dataframe_random_data
164+
df = frame.T
165+
166+
col = df['foo', 'one']
167+
tm.assert_almost_equal(col.values, df.values[:, 0])
168+
with pytest.raises(KeyError):
169+
df[('foo', 'four')]
170+
with pytest.raises(KeyError):
171+
df['foobar']
172+
173+
def test_series_getitem(
174+
self, multiindex_year_month_day_dataframe_random_data):
175+
ymd = multiindex_year_month_day_dataframe_random_data
176+
s = ymd['A']
177+
178+
result = s[2000, 3]
179+
180+
# TODO(wesm): unused?
181+
# result2 = s.loc[2000, 3]
182+
183+
expected = s.reindex(s.index[42:65])
184+
expected.index = expected.index.droplevel(0).droplevel(0)
185+
tm.assert_series_equal(result, expected)
186+
187+
result = s[2000, 3, 10]
188+
expected = s[49]
189+
assert result == expected
190+
191+
# fancy
192+
expected = s.reindex(s.index[49:51])
193+
result = s.loc[[(2000, 3, 10), (2000, 3, 13)]]
194+
tm.assert_series_equal(result, expected)
195+
196+
with catch_warnings(record=True):
197+
simplefilter("ignore", DeprecationWarning)
198+
result = s.ix[[(2000, 3, 10), (2000, 3, 13)]]
199+
tm.assert_series_equal(result, expected)
200+
201+
# key error
202+
pytest.raises(KeyError, s.__getitem__, (2000, 3, 4))
203+
204+
def test_series_getitem_corner(
205+
self, multiindex_year_month_day_dataframe_random_data):
206+
ymd = multiindex_year_month_day_dataframe_random_data
207+
s = ymd['A']
208+
209+
# don't segfault, GH #495
210+
# out of bounds access
211+
pytest.raises(IndexError, s.__getitem__, len(ymd))
212+
213+
# generator
214+
result = s[(x > 0 for x in s)]
215+
expected = s[s > 0]
216+
tm.assert_series_equal(result, expected)
217+
218+
def test_frame_getitem_multicolumn_empty_level(self):
219+
f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
220+
f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
221+
['level3 item1', 'level3 item2']]
222+
223+
result = f['level1 item1']
224+
expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
225+
columns=['level3 item1'])
226+
tm.assert_frame_equal(result, expected)
227+
228+
def test_getitem_tuple_plus_slice(self):
229+
# GH #671
230+
df = DataFrame({'a': lrange(10),
231+
'b': lrange(10),
232+
'c': np.random.randn(10),
233+
'd': np.random.randn(10)})
234+
235+
idf = df.set_index(['a', 'b'])
236+
237+
result = idf.loc[(0, 0), :]
238+
expected = idf.loc[0, 0]
239+
expected2 = idf.xs((0, 0))
240+
with catch_warnings(record=True):
241+
simplefilter("ignore", DeprecationWarning)
242+
expected3 = idf.ix[0, 0]
243+
244+
tm.assert_series_equal(result, expected)
245+
tm.assert_series_equal(result, expected2)
246+
tm.assert_series_equal(result, expected3)
247+
248+
def test_getitem_toplevel(self, multiindex_dataframe_random_data):
249+
frame = multiindex_dataframe_random_data
250+
df = frame.T
251+
252+
result = df['foo']
253+
expected = df.reindex(columns=df.columns[:3])
254+
expected.columns = expected.columns.droplevel(0)
255+
tm.assert_frame_equal(result, expected)
256+
257+
result = df['bar']
258+
result2 = df.loc[:, 'bar']
259+
260+
expected = df.reindex(columns=df.columns[3:5])
261+
expected.columns = expected.columns.droplevel(0)
262+
tm.assert_frame_equal(result, expected)
263+
tm.assert_frame_equal(result, result2)
264+
265+
def test_getitem_int(self, multiindex_dataframe_random_data):
266+
levels = [[0, 1], [0, 1, 2]]
267+
labels = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
268+
index = MultiIndex(levels=levels, labels=labels)
269+
270+
frame = DataFrame(np.random.randn(6, 2), index=index)
271+
272+
result = frame.loc[1]
273+
expected = frame[-3:]
274+
expected.index = expected.index.droplevel(0)
275+
tm.assert_frame_equal(result, expected)
276+
277+
# raises exception
278+
pytest.raises(KeyError, frame.loc.__getitem__, 3)
279+
280+
# however this will work
281+
frame = multiindex_dataframe_random_data
282+
result = frame.iloc[2]
283+
expected = frame.xs(frame.index[2])
284+
tm.assert_series_equal(result, expected)
285+
286+
def test_frame_getitem_view(self, multiindex_dataframe_random_data):
287+
frame = multiindex_dataframe_random_data
288+
df = frame.T.copy()
289+
290+
# this works because we are modifying the underlying array
291+
# really a no-no
292+
df['foo'].values[:] = 0
293+
assert (df['foo'].values == 0).all()
294+
295+
# but not if it's mixed-type
296+
df['foo', 'four'] = 'foo'
297+
df = df.sort_index(level=0, axis=1)
298+
299+
# this will work, but will raise/warn as its chained assignment
300+
def f():
301+
df['foo']['one'] = 2
302+
return df
303+
304+
pytest.raises(com.SettingWithCopyError, f)
305+
306+
try:
307+
df = f()
308+
except ValueError:
309+
pass
310+
assert (df['foo', 'one'] == 0).all()
311+
312+
def test_getitem_lowerdim_corner(self, multiindex_dataframe_random_data):
313+
frame = multiindex_dataframe_random_data
314+
pytest.raises(KeyError, frame.loc.__getitem__,
315+
(('bar', 'three'), 'B'))
316+
317+
# in theory should be inserting in a sorted space????
318+
frame.loc[('bar', 'three'), 'B'] = 0
319+
assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
320+
321+
@pytest.mark.parametrize('unicode_strings', [True, False])
322+
def test_mixed_depth_get(self, unicode_strings):
323+
# If unicode_strings is True, the column labels in dataframe
324+
# construction will use unicode strings in Python 2 (pull request
325+
# #17099).
326+
327+
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
328+
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
329+
['', 'wx', 'wy', '', '', '']]
330+
331+
if unicode_strings:
332+
arrays = [[u(s) for s in arr] for arr in arrays]
333+
334+
tuples = sorted(zip(*arrays))
335+
index = MultiIndex.from_tuples(tuples)
336+
df = DataFrame(np.random.randn(4, 6), columns=index)
337+
338+
result = df['a']
339+
expected = df['a', '', ''].rename('a')
340+
tm.assert_series_equal(result, expected)
341+
342+
result = df['routine1', 'result1']
343+
expected = df['routine1', 'result1', '']
344+
expected = expected.rename(('routine1', 'result1'))
345+
tm.assert_series_equal(result, expected)
Lines changed: 129 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,129 @@
1+
from warnings import catch_warnings
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, MultiIndex, Series
7+
from pandas.util import testing as tm
8+
9+
10+
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
11+
class TestMultiIndexIloc(object):
12+
13+
def test_iloc_getitem_multiindex2(self):
14+
# TODO(wesm): fix this
15+
pytest.skip('this test was being suppressed, '
16+
'needs to be fixed')
17+
18+
arr = np.random.randn(3, 3)
19+
df = DataFrame(arr, columns=[[2, 2, 4], [6, 8, 10]],
20+
index=[[4, 4, 8], [8, 10, 12]])
21+
22+
rs = df.iloc[2]
23+
xp = Series(arr[2], index=df.columns)
24+
tm.assert_series_equal(rs, xp)
25+
26+
rs = df.iloc[:, 2]
27+
xp = Series(arr[:, 2], index=df.index)
28+
tm.assert_series_equal(rs, xp)
29+
30+
rs = df.iloc[2, 2]
31+
xp = df.values[2, 2]
32+
assert rs == xp
33+
34+
# for multiple items
35+
# GH 5528
36+
rs = df.iloc[[0, 1]]
37+
xp = df.xs(4, drop_level=False)
38+
tm.assert_frame_equal(rs, xp)
39+
40+
tup = zip(*[['a', 'a', 'b', 'b'], ['x', 'y', 'x', 'y']])
41+
index = MultiIndex.from_tuples(tup)
42+
df = DataFrame(np.random.randn(4, 4), index=index)
43+
rs = df.iloc[[2, 3]]
44+
xp = df.xs('b', drop_level=False)
45+
tm.assert_frame_equal(rs, xp)
46+
47+
def test_iloc_getitem_multiindex(self):
48+
mi_labels = DataFrame(np.random.randn(4, 3),
49+
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
50+
index=[['i', 'i', 'j', 'k'],
51+
['X', 'X', 'Y', 'Y']])
52+
53+
mi_int = DataFrame(np.random.randn(3, 3),
54+
columns=[[2, 2, 4], [6, 8, 10]],
55+
index=[[4, 4, 8], [8, 10, 12]])
56+
57+
# the first row
58+
rs = mi_int.iloc[0]
59+
with catch_warnings(record=True):
60+
xp = mi_int.ix[4].ix[8]
61+
tm.assert_series_equal(rs, xp, check_names=False)
62+
assert rs.name == (4, 8)
63+
assert xp.name == 8
64+
65+
# 2nd (last) columns
66+
rs = mi_int.iloc[:, 2]
67+
with catch_warnings(record=True):
68+
xp = mi_int.ix[:, 2]
69+
tm.assert_series_equal(rs, xp)
70+
71+
# corner column
72+
rs = mi_int.iloc[2, 2]
73+
with catch_warnings(record=True):
74+
# First level is int - so use .loc rather than .ix (GH 21593)
75+
xp = mi_int.loc[(8, 12), (4, 10)]
76+
assert rs == xp
77+
78+
# this is basically regular indexing
79+
rs = mi_labels.iloc[2, 2]
80+
with catch_warnings(record=True):
81+
xp = mi_labels.ix['j'].ix[:, 'j'].ix[0, 0]
82+
assert rs == xp
83+
84+
def test_frame_getitem_setitem_slice(
85+
self, multiindex_dataframe_random_data):
86+
frame = multiindex_dataframe_random_data
87+
# getitem
88+
result = frame.iloc[:4]
89+
expected = frame[:4]
90+
tm.assert_frame_equal(result, expected)
91+
92+
# setitem
93+
cp = frame.copy()
94+
cp.iloc[:4] = 0
95+
96+
assert (cp.values[:4] == 0).all()
97+
assert (cp.values[4:] != 0).all()
98+
99+
def test_indexing_ambiguity_bug_1678(self):
100+
columns = MultiIndex.from_tuples([('Ohio', 'Green'), ('Ohio', 'Red'), (
101+
'Colorado', 'Green')])
102+
index = MultiIndex.from_tuples([('a', 1), ('a', 2), ('b', 1), ('b', 2)
103+
])
104+
105+
frame = DataFrame(np.arange(12).reshape((4, 3)), index=index,
106+
columns=columns)
107+
108+
result = frame.iloc[:, 1]
109+
exp = frame.loc[:, ('Ohio', 'Red')]
110+
assert isinstance(result, Series)
111+
tm.assert_series_equal(result, exp)
112+
113+
def test_iloc_mi(self):
114+
# GH 13797
115+
# Test if iloc can handle integer locations in MultiIndexed DataFrame
116+
117+
data = [['str00', 'str01'], ['str10', 'str11'], ['str20', 'srt21'],
118+
['str30', 'str31'], ['str40', 'str41']]
119+
120+
mi = MultiIndex.from_tuples(
121+
[('CC', 'A'), ('CC', 'B'), ('CC', 'B'), ('BB', 'a'), ('BB', 'b')])
122+
123+
expected = DataFrame(data)
124+
df_mi = DataFrame(data, index=mi)
125+
126+
result = DataFrame([[df_mi.iloc[r, c] for c in range(2)]
127+
for r in range(5)])
128+
129+
tm.assert_frame_equal(result, expected)
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from warnings import catch_warnings, simplefilter
2+
3+
import pytest
4+
5+
from pandas.compat import lrange
6+
7+
8+
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
9+
class TestMultiIndexIx(object):
10+
11+
def test_frame_setitem_ix(self, multiindex_dataframe_random_data):
12+
frame = multiindex_dataframe_random_data
13+
frame.loc[('bar', 'two'), 'B'] = 5
14+
assert frame.loc[('bar', 'two'), 'B'] == 5
15+
16+
# with integer labels
17+
df = frame.copy()
18+
df.columns = lrange(3)
19+
df.loc[('bar', 'two'), 1] = 7
20+
assert df.loc[('bar', 'two'), 1] == 7
21+
22+
with catch_warnings(record=True):
23+
simplefilter("ignore", DeprecationWarning)
24+
df = frame.copy()
25+
df.columns = lrange(3)
26+
df.ix[('bar', 'two'), 1] = 7
27+
assert df.loc[('bar', 'two'), 1] == 7
Lines changed: 177 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,177 @@
1+
from warnings import catch_warnings
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, MultiIndex, Series
7+
from pandas.util import testing as tm
8+
9+
10+
@pytest.fixture
11+
def single_level_multiindex():
12+
"""single level MultiIndex"""
13+
return MultiIndex(levels=[['foo', 'bar', 'baz', 'qux']],
14+
labels=[[0, 1, 2, 3]], names=['first'])
15+
16+
17+
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
18+
class TestMultiIndexLoc(object):
19+
20+
def test_loc_getitem_series(self):
21+
# GH14730
22+
# passing a series as a key with a MultiIndex
23+
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
24+
x = Series(index=index, data=range(9), dtype=np.float64)
25+
y = Series([1, 3])
26+
expected = Series(
27+
data=[0, 1, 2, 6, 7, 8],
28+
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
29+
dtype=np.float64)
30+
result = x.loc[y]
31+
tm.assert_series_equal(result, expected)
32+
33+
result = x.loc[[1, 3]]
34+
tm.assert_series_equal(result, expected)
35+
36+
# GH15424
37+
y1 = Series([1, 3], index=[1, 2])
38+
result = x.loc[y1]
39+
tm.assert_series_equal(result, expected)
40+
41+
empty = Series(data=[], dtype=np.float64)
42+
expected = Series([], index=MultiIndex(
43+
levels=index.levels, labels=[[], []], dtype=np.float64))
44+
result = x.loc[empty]
45+
tm.assert_series_equal(result, expected)
46+
47+
def test_loc_getitem_array(self):
48+
# GH15434
49+
# passing an array as a key with a MultiIndex
50+
index = MultiIndex.from_product([[1, 2, 3], ['A', 'B', 'C']])
51+
x = Series(index=index, data=range(9), dtype=np.float64)
52+
y = np.array([1, 3])
53+
expected = Series(
54+
data=[0, 1, 2, 6, 7, 8],
55+
index=MultiIndex.from_product([[1, 3], ['A', 'B', 'C']]),
56+
dtype=np.float64)
57+
result = x.loc[y]
58+
tm.assert_series_equal(result, expected)
59+
60+
# empty array:
61+
empty = np.array([])
62+
expected = Series([], index=MultiIndex(
63+
levels=index.levels, labels=[[], []], dtype=np.float64))
64+
result = x.loc[empty]
65+
tm.assert_series_equal(result, expected)
66+
67+
# 0-dim array (scalar):
68+
scalar = np.int64(1)
69+
expected = Series(
70+
data=[0, 1, 2],
71+
index=['A', 'B', 'C'],
72+
dtype=np.float64)
73+
result = x.loc[scalar]
74+
tm.assert_series_equal(result, expected)
75+
76+
def test_loc_multiindex(self):
77+
78+
mi_labels = DataFrame(np.random.randn(3, 3),
79+
columns=[['i', 'i', 'j'], ['A', 'A', 'B']],
80+
index=[['i', 'i', 'j'], ['X', 'X', 'Y']])
81+
82+
mi_int = DataFrame(np.random.randn(3, 3),
83+
columns=[[2, 2, 4], [6, 8, 10]],
84+
index=[[4, 4, 8], [8, 10, 12]])
85+
86+
# the first row
87+
rs = mi_labels.loc['i']
88+
with catch_warnings(record=True):
89+
xp = mi_labels.ix['i']
90+
tm.assert_frame_equal(rs, xp)
91+
92+
# 2nd (last) columns
93+
rs = mi_labels.loc[:, 'j']
94+
with catch_warnings(record=True):
95+
xp = mi_labels.ix[:, 'j']
96+
tm.assert_frame_equal(rs, xp)
97+
98+
# corner column
99+
rs = mi_labels.loc['j'].loc[:, 'j']
100+
with catch_warnings(record=True):
101+
xp = mi_labels.ix['j'].ix[:, 'j']
102+
tm.assert_frame_equal(rs, xp)
103+
104+
# with a tuple
105+
rs = mi_labels.loc[('i', 'X')]
106+
with catch_warnings(record=True):
107+
xp = mi_labels.ix[('i', 'X')]
108+
tm.assert_frame_equal(rs, xp)
109+
110+
rs = mi_int.loc[4]
111+
with catch_warnings(record=True):
112+
xp = mi_int.ix[4]
113+
tm.assert_frame_equal(rs, xp)
114+
115+
# missing label
116+
pytest.raises(KeyError, lambda: mi_int.loc[2])
117+
with catch_warnings(record=True):
118+
# GH 21593
119+
pytest.raises(KeyError, lambda: mi_int.ix[2])
120+
121+
def test_loc_multiindex_indexer_none(self):
122+
123+
# GH6788
124+
# multi-index indexer is None (meaning take all)
125+
attributes = ['Attribute' + str(i) for i in range(1)]
126+
attribute_values = ['Value' + str(i) for i in range(5)]
127+
128+
index = MultiIndex.from_product([attributes, attribute_values])
129+
df = 0.1 * np.random.randn(10, 1 * 5) + 0.5
130+
df = DataFrame(df, columns=index)
131+
result = df[attributes]
132+
tm.assert_frame_equal(result, df)
133+
134+
# GH 7349
135+
# loc with a multi-index seems to be doing fallback
136+
df = DataFrame(np.arange(12).reshape(-1, 1),
137+
index=MultiIndex.from_product([[1, 2, 3, 4],
138+
[1, 2, 3]]))
139+
140+
expected = df.loc[([1, 2], ), :]
141+
result = df.loc[[1, 2]]
142+
tm.assert_frame_equal(result, expected)
143+
144+
def test_loc_multiindex_incomplete(self):
145+
146+
# GH 7399
147+
# incomplete indexers
148+
s = Series(np.arange(15, dtype='int64'),
149+
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
150+
expected = s.loc[:, 'a':'c']
151+
152+
result = s.loc[0:4, 'a':'c']
153+
tm.assert_series_equal(result, expected)
154+
tm.assert_series_equal(result, expected)
155+
156+
result = s.loc[:4, 'a':'c']
157+
tm.assert_series_equal(result, expected)
158+
tm.assert_series_equal(result, expected)
159+
160+
result = s.loc[0:, 'a':'c']
161+
tm.assert_series_equal(result, expected)
162+
tm.assert_series_equal(result, expected)
163+
164+
# GH 7400
165+
# multiindexer gettitem with list of indexers skips wrong element
166+
s = Series(np.arange(15, dtype='int64'),
167+
MultiIndex.from_product([range(5), ['a', 'b', 'c']]))
168+
expected = s.iloc[[6, 7, 8, 12, 13, 14]]
169+
result = s.loc[2:4:2, 'a':'c']
170+
tm.assert_series_equal(result, expected)
171+
172+
def test_get_loc_single_level(self, single_level_multiindex):
173+
single_level = single_level_multiindex
174+
s = Series(np.random.randn(len(single_level)),
175+
index=single_level)
176+
for k in single_level.values:
177+
s[k]
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
2+
import numpy as np
3+
import pytest
4+
5+
import pandas._libs.index as _index
6+
from pandas.errors import PerformanceWarning
7+
8+
import pandas as pd
9+
from pandas import DataFrame, MultiIndex, Series
10+
from pandas.util import testing as tm
11+
12+
13+
class TestMultiIndexBasic(object):
14+
15+
def test_multiindex_perf_warn(self):
16+
17+
df = DataFrame({'jim': [0, 0, 1, 1],
18+
'joe': ['x', 'x', 'z', 'y'],
19+
'jolie': np.random.rand(4)}).set_index(['jim', 'joe'])
20+
21+
with tm.assert_produces_warning(PerformanceWarning,
22+
clear=[pd.core.index]):
23+
df.loc[(1, 'z')]
24+
25+
df = df.iloc[[2, 1, 3, 0]]
26+
with tm.assert_produces_warning(PerformanceWarning):
27+
df.loc[(0, )]
28+
29+
def test_multiindex_contains_dropped(self):
30+
# GH 19027
31+
# test that dropped MultiIndex levels are not in the MultiIndex
32+
# despite continuing to be in the MultiIndex's levels
33+
idx = MultiIndex.from_product([[1, 2], [3, 4]])
34+
assert 2 in idx
35+
idx = idx.drop(2)
36+
37+
# drop implementation keeps 2 in the levels
38+
assert 2 in idx.levels[0]
39+
# but it should no longer be in the index itself
40+
assert 2 not in idx
41+
42+
# also applies to strings
43+
idx = MultiIndex.from_product([['a', 'b'], ['c', 'd']])
44+
assert 'a' in idx
45+
idx = idx.drop('a')
46+
assert 'a' in idx.levels[0]
47+
assert 'a' not in idx
48+
49+
@pytest.mark.parametrize("data, expected", [
50+
(MultiIndex.from_product([(), ()]), True),
51+
(MultiIndex.from_product([(1, 2), (3, 4)]), True),
52+
(MultiIndex.from_product([('a', 'b'), (1, 2)]), False),
53+
])
54+
def test_multiindex_is_homogeneous_type(self, data, expected):
55+
assert data._is_homogeneous_type is expected
56+
57+
def test_indexing_over_hashtable_size_cutoff(self):
58+
n = 10000
59+
60+
old_cutoff = _index._SIZE_CUTOFF
61+
_index._SIZE_CUTOFF = 20000
62+
63+
s = Series(np.arange(n),
64+
MultiIndex.from_arrays((["a"] * n, np.arange(n))))
65+
66+
# hai it works!
67+
assert s[("a", 5)] == 5
68+
assert s[("a", 6)] == 6
69+
assert s[("a", 7)] == 7
70+
71+
_index._SIZE_CUTOFF = old_cutoff
Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas import DataFrame, MultiIndex, Panel, Series
5+
from pandas.util import testing as tm
6+
7+
8+
@pytest.mark.filterwarnings('ignore:\\nPanel:FutureWarning')
9+
class TestMultiIndexPanel(object):
10+
11+
def test_iloc_getitem_panel_multiindex(self):
12+
13+
# GH 7199
14+
# Panel with multi-index
15+
multi_index = MultiIndex.from_tuples([('ONE', 'one'),
16+
('TWO', 'two'),
17+
('THREE', 'three')],
18+
names=['UPPER', 'lower'])
19+
20+
simple_index = [x[0] for x in multi_index]
21+
wd1 = Panel(items=['First', 'Second'],
22+
major_axis=['a', 'b', 'c', 'd'],
23+
minor_axis=multi_index)
24+
25+
wd2 = Panel(items=['First', 'Second'],
26+
major_axis=['a', 'b', 'c', 'd'],
27+
minor_axis=simple_index)
28+
29+
expected1 = wd1['First'].iloc[[True, True, True, False], [0, 2]]
30+
result1 = wd1.iloc[0, [True, True, True, False], [0, 2]] # WRONG
31+
tm.assert_frame_equal(result1, expected1)
32+
33+
expected2 = wd2['First'].iloc[[True, True, True, False], [0, 2]]
34+
result2 = wd2.iloc[0, [True, True, True, False], [0, 2]]
35+
tm.assert_frame_equal(result2, expected2)
36+
37+
expected1 = DataFrame(index=['a'], columns=multi_index,
38+
dtype='float64')
39+
result1 = wd1.iloc[0, [0], [0, 1, 2]]
40+
tm.assert_frame_equal(result1, expected1)
41+
42+
expected2 = DataFrame(index=['a'], columns=simple_index,
43+
dtype='float64')
44+
result2 = wd2.iloc[0, [0], [0, 1, 2]]
45+
tm.assert_frame_equal(result2, expected2)
46+
47+
# GH 7516
48+
mi = MultiIndex.from_tuples([(0, 'x'), (1, 'y'), (2, 'z')])
49+
p = Panel(np.arange(3 * 3 * 3, dtype='int64').reshape(3, 3, 3),
50+
items=['a', 'b', 'c'], major_axis=mi,
51+
minor_axis=['u', 'v', 'w'])
52+
result = p.iloc[:, 1, 0]
53+
expected = Series([3, 12, 21], index=['a', 'b', 'c'], name='u')
54+
tm.assert_series_equal(result, expected)
55+
56+
result = p.loc[:, (1, 'y'), 'u']
57+
tm.assert_series_equal(result, expected)
58+
59+
def test_panel_setitem_with_multiindex(self):
60+
61+
# 10360
62+
# failing with a multi-index
63+
arr = np.array([[[1, 2, 3], [0, 0, 0]],
64+
[[0, 0, 0], [0, 0, 0]]],
65+
dtype=np.float64)
66+
67+
# reg index
68+
axes = dict(items=['A', 'B'], major_axis=[0, 1],
69+
minor_axis=['X', 'Y', 'Z'])
70+
p1 = Panel(0., **axes)
71+
p1.iloc[0, 0, :] = [1, 2, 3]
72+
expected = Panel(arr, **axes)
73+
tm.assert_panel_equal(p1, expected)
74+
75+
# multi-indexes
76+
axes['items'] = MultiIndex.from_tuples(
77+
[('A', 'a'), ('B', 'b')])
78+
p2 = Panel(0., **axes)
79+
p2.iloc[0, 0, :] = [1, 2, 3]
80+
expected = Panel(arr, **axes)
81+
tm.assert_panel_equal(p2, expected)
82+
83+
axes['major_axis'] = MultiIndex.from_tuples(
84+
[('A', 1), ('A', 2)])
85+
p3 = Panel(0., **axes)
86+
p3.iloc[0, 0, :] = [1, 2, 3]
87+
expected = Panel(arr, **axes)
88+
tm.assert_panel_equal(p3, expected)
89+
90+
axes['minor_axis'] = MultiIndex.from_product(
91+
[['X'], range(3)])
92+
p4 = Panel(0., **axes)
93+
p4.iloc[0, 0, :] = [1, 2, 3]
94+
expected = Panel(arr, **axes)
95+
tm.assert_panel_equal(p4, expected)
96+
97+
arr = np.array(
98+
[[[1, 0, 0], [2, 0, 0]], [[0, 0, 0], [0, 0, 0]]],
99+
dtype=np.float64)
100+
p5 = Panel(0., **axes)
101+
p5.iloc[0, :, 0] = [1, 2]
102+
expected = Panel(arr, **axes)
103+
tm.assert_panel_equal(p5, expected)
Lines changed: 183 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
from warnings import catch_warnings, simplefilter
2+
3+
import numpy as np
4+
import pytest
5+
6+
from pandas import DataFrame, MultiIndex
7+
from pandas.util import testing as tm
8+
9+
10+
class TestMultiIndexPartial(object):
11+
12+
def test_getitem_partial_int(self):
13+
# GH 12416
14+
# with single item
15+
l1 = [10, 20]
16+
l2 = ['a', 'b']
17+
df = DataFrame(index=range(2),
18+
columns=MultiIndex.from_product([l1, l2]))
19+
expected = DataFrame(index=range(2),
20+
columns=l2)
21+
result = df[20]
22+
tm.assert_frame_equal(result, expected)
23+
24+
# with list
25+
expected = DataFrame(index=range(2),
26+
columns=MultiIndex.from_product([l1[1:], l2]))
27+
result = df[[20]]
28+
tm.assert_frame_equal(result, expected)
29+
30+
# missing item:
31+
with pytest.raises(KeyError, match='1'):
32+
df[1]
33+
with pytest.raises(KeyError, match=r"'\[1\] not in index'"):
34+
df[[1]]
35+
36+
def test_series_slice_partial(self):
37+
pass
38+
39+
def test_xs_partial(self, multiindex_dataframe_random_data,
40+
multiindex_year_month_day_dataframe_random_data):
41+
frame = multiindex_dataframe_random_data
42+
ymd = multiindex_year_month_day_dataframe_random_data
43+
result = frame.xs('foo')
44+
result2 = frame.loc['foo']
45+
expected = frame.T['foo'].T
46+
tm.assert_frame_equal(result, expected)
47+
tm.assert_frame_equal(result, result2)
48+
49+
result = ymd.xs((2000, 4))
50+
expected = ymd.loc[2000, 4]
51+
tm.assert_frame_equal(result, expected)
52+
53+
# ex from #1796
54+
index = MultiIndex(levels=[['foo', 'bar'], ['one', 'two'], [-1, 1]],
55+
labels=[[0, 0, 0, 0, 1, 1, 1, 1],
56+
[0, 0, 1, 1, 0, 0, 1, 1], [0, 1, 0, 1, 0, 1,
57+
0, 1]])
58+
df = DataFrame(np.random.randn(8, 4), index=index,
59+
columns=list('abcd'))
60+
61+
result = df.xs(['foo', 'one'])
62+
expected = df.loc['foo', 'one']
63+
tm.assert_frame_equal(result, expected)
64+
65+
def test_getitem_partial(
66+
self, multiindex_year_month_day_dataframe_random_data):
67+
ymd = multiindex_year_month_day_dataframe_random_data
68+
ymd = ymd.T
69+
result = ymd[2000, 2]
70+
71+
expected = ymd.reindex(columns=ymd.columns[ymd.columns.labels[1] == 1])
72+
expected.columns = expected.columns.droplevel(0).droplevel(0)
73+
tm.assert_frame_equal(result, expected)
74+
75+
def test_fancy_slice_partial(
76+
self, multiindex_dataframe_random_data,
77+
multiindex_year_month_day_dataframe_random_data):
78+
frame = multiindex_dataframe_random_data
79+
result = frame.loc['bar':'baz']
80+
expected = frame[3:7]
81+
tm.assert_frame_equal(result, expected)
82+
83+
ymd = multiindex_year_month_day_dataframe_random_data
84+
result = ymd.loc[(2000, 2):(2000, 4)]
85+
lev = ymd.index.labels[1]
86+
expected = ymd[(lev >= 1) & (lev <= 3)]
87+
tm.assert_frame_equal(result, expected)
88+
89+
def test_getitem_partial_column_select(self):
90+
idx = MultiIndex(labels=[[0, 0, 0], [0, 1, 1], [1, 0, 1]],
91+
levels=[['a', 'b'], ['x', 'y'], ['p', 'q']])
92+
df = DataFrame(np.random.rand(3, 2), index=idx)
93+
94+
result = df.loc[('a', 'y'), :]
95+
expected = df.loc[('a', 'y')]
96+
tm.assert_frame_equal(result, expected)
97+
98+
result = df.loc[('a', 'y'), [1, 0]]
99+
expected = df.loc[('a', 'y')][[1, 0]]
100+
tm.assert_frame_equal(result, expected)
101+
102+
with catch_warnings(record=True):
103+
simplefilter("ignore", DeprecationWarning)
104+
result = df.ix[('a', 'y'), [1, 0]]
105+
tm.assert_frame_equal(result, expected)
106+
107+
pytest.raises(KeyError, df.loc.__getitem__,
108+
(('a', 'foo'), slice(None, None)))
109+
110+
def test_partial_set(
111+
self, multiindex_year_month_day_dataframe_random_data):
112+
# GH #397
113+
ymd = multiindex_year_month_day_dataframe_random_data
114+
df = ymd.copy()
115+
exp = ymd.copy()
116+
df.loc[2000, 4] = 0
117+
exp.loc[2000, 4].values[:] = 0
118+
tm.assert_frame_equal(df, exp)
119+
120+
df['A'].loc[2000, 4] = 1
121+
exp['A'].loc[2000, 4].values[:] = 1
122+
tm.assert_frame_equal(df, exp)
123+
124+
df.loc[2000] = 5
125+
exp.loc[2000].values[:] = 5
126+
tm.assert_frame_equal(df, exp)
127+
128+
# this works...for now
129+
df['A'].iloc[14] = 5
130+
assert df['A'][14] == 5
131+
132+
# ---------------------------------------------------------------------
133+
# AMBIGUOUS CASES!
134+
135+
def test_partial_ix_missing(
136+
self, multiindex_year_month_day_dataframe_random_data):
137+
pytest.skip("skipping for now")
138+
139+
ymd = multiindex_year_month_day_dataframe_random_data
140+
result = ymd.loc[2000, 0]
141+
expected = ymd.loc[2000]['A']
142+
tm.assert_series_equal(result, expected)
143+
144+
# need to put in some work here
145+
146+
# self.ymd.loc[2000, 0] = 0
147+
# assert (self.ymd.loc[2000]['A'] == 0).all()
148+
149+
# Pretty sure the second (and maybe even the first) is already wrong.
150+
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6))
151+
pytest.raises(Exception, ymd.loc.__getitem__, (2000, 6), 0)
152+
153+
# ---------------------------------------------------------------------
154+
155+
def test_setitem_multiple_partial(self, multiindex_dataframe_random_data):
156+
frame = multiindex_dataframe_random_data
157+
expected = frame.copy()
158+
result = frame.copy()
159+
result.loc[['foo', 'bar']] = 0
160+
expected.loc['foo'] = 0
161+
expected.loc['bar'] = 0
162+
tm.assert_frame_equal(result, expected)
163+
164+
expected = frame.copy()
165+
result = frame.copy()
166+
result.loc['foo':'bar'] = 0
167+
expected.loc['foo'] = 0
168+
expected.loc['bar'] = 0
169+
tm.assert_frame_equal(result, expected)
170+
171+
expected = frame['A'].copy()
172+
result = frame['A'].copy()
173+
result.loc[['foo', 'bar']] = 0
174+
expected.loc['foo'] = 0
175+
expected.loc['bar'] = 0
176+
tm.assert_series_equal(result, expected)
177+
178+
expected = frame['A'].copy()
179+
result = frame['A'].copy()
180+
result.loc['foo':'bar'] = 0
181+
expected.loc['foo'] = 0
182+
expected.loc['bar'] = 0
183+
tm.assert_series_equal(result, expected)
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
from numpy.random import randn
2+
3+
from pandas import DataFrame, MultiIndex, Series
4+
from pandas.util import testing as tm
5+
6+
7+
class TestMultiIndexSetOps(object):
8+
9+
def test_multiindex_symmetric_difference(self):
10+
# GH 13490
11+
idx = MultiIndex.from_product([['a', 'b'], ['A', 'B']],
12+
names=['a', 'b'])
13+
result = idx ^ idx
14+
assert result.names == idx.names
15+
16+
idx2 = idx.copy().rename(['A', 'B'])
17+
result = idx ^ idx2
18+
assert result.names == [None, None]
19+
20+
def test_mixed_depth_insert(self):
21+
arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'],
22+
['', 'OD', 'OD', 'result1', 'result2', 'result1'],
23+
['', 'wx', 'wy', '', '', '']]
24+
25+
tuples = sorted(zip(*arrays))
26+
index = MultiIndex.from_tuples(tuples)
27+
df = DataFrame(randn(4, 6), columns=index)
28+
29+
result = df.copy()
30+
expected = df.copy()
31+
result['b'] = [1, 2, 3, 4]
32+
expected['b', '', ''] = [1, 2, 3, 4]
33+
tm.assert_frame_equal(result, expected)
34+
35+
def test_dataframe_insert_column_all_na(self):
36+
# GH #1534
37+
mix = MultiIndex.from_tuples([('1a', '2a'), ('1a', '2b'), ('1a', '2c')
38+
])
39+
df = DataFrame([[1, 2], [3, 4], [5, 6]], index=mix)
40+
s = Series({(1, 1): 1, (1, 2): 2})
41+
df['new'] = s
42+
assert df['new'].isna().all()

‎pandas/tests/indexing/multiindex/test_setitem.py

Lines changed: 404 additions & 0 deletions
Large diffs are not rendered by default.

‎pandas/tests/indexing/multiindex/test_slice.py

Lines changed: 572 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
import numpy as np
2+
from numpy.random import randn
3+
4+
from pandas.compat import lzip
5+
6+
from pandas import DataFrame, MultiIndex, Series
7+
from pandas.util import testing as tm
8+
9+
10+
class TestMultiIndexSorted(object):
11+
def test_getitem_multilevel_index_tuple_not_sorted(self):
12+
index_columns = list("abc")
13+
df = DataFrame([[0, 1, 0, "x"], [0, 0, 1, "y"]],
14+
columns=index_columns + ["data"])
15+
df = df.set_index(index_columns)
16+
query_index = df.index[:1]
17+
rs = df.loc[query_index, "data"]
18+
19+
xp_idx = MultiIndex.from_tuples([(0, 1, 0)], names=['a', 'b', 'c'])
20+
xp = Series(['x'], index=xp_idx, name='data')
21+
tm.assert_series_equal(rs, xp)
22+
23+
def test_getitem_slice_not_sorted(self, multiindex_dataframe_random_data):
24+
frame = multiindex_dataframe_random_data
25+
df = frame.sort_index(level=1).T
26+
27+
# buglet with int typechecking
28+
result = df.iloc[:, :np.int32(3)]
29+
expected = df.reindex(columns=df.columns[:3])
30+
tm.assert_frame_equal(result, expected)
31+
32+
def test_frame_getitem_not_sorted2(self):
33+
# 13431
34+
df = DataFrame({'col1': ['b', 'd', 'b', 'a'],
35+
'col2': [3, 1, 1, 2],
36+
'data': ['one', 'two', 'three', 'four']})
37+
38+
df2 = df.set_index(['col1', 'col2'])
39+
df2_original = df2.copy()
40+
41+
df2.index.set_levels(['b', 'd', 'a'], level='col1', inplace=True)
42+
df2.index.set_labels([0, 1, 0, 2], level='col1', inplace=True)
43+
assert not df2.index.is_lexsorted()
44+
assert not df2.index.is_monotonic
45+
46+
assert df2_original.index.equals(df2.index)
47+
expected = df2.sort_index()
48+
assert expected.index.is_lexsorted()
49+
assert expected.index.is_monotonic
50+
51+
result = df2.sort_index(level=0)
52+
assert result.index.is_lexsorted()
53+
assert result.index.is_monotonic
54+
tm.assert_frame_equal(result, expected)
55+
56+
def test_frame_getitem_not_sorted(self, multiindex_dataframe_random_data):
57+
frame = multiindex_dataframe_random_data
58+
df = frame.T
59+
df['foo', 'four'] = 'foo'
60+
61+
arrays = [np.array(x) for x in zip(*df.columns.values)]
62+
63+
result = df['foo']
64+
result2 = df.loc[:, 'foo']
65+
expected = df.reindex(columns=df.columns[arrays[0] == 'foo'])
66+
expected.columns = expected.columns.droplevel(0)
67+
tm.assert_frame_equal(result, expected)
68+
tm.assert_frame_equal(result2, expected)
69+
70+
df = df.T
71+
result = df.xs('foo')
72+
result2 = df.loc['foo']
73+
expected = df.reindex(df.index[arrays[0] == 'foo'])
74+
expected.index = expected.index.droplevel(0)
75+
tm.assert_frame_equal(result, expected)
76+
tm.assert_frame_equal(result2, expected)
77+
78+
def test_series_getitem_not_sorted(self):
79+
arrays = [['bar', 'bar', 'baz', 'baz', 'qux', 'qux', 'foo', 'foo'],
80+
['one', 'two', 'one', 'two', 'one', 'two', 'one', 'two']]
81+
tuples = lzip(*arrays)
82+
index = MultiIndex.from_tuples(tuples)
83+
s = Series(randn(8), index=index)
84+
85+
arrays = [np.array(x) for x in zip(*index.values)]
86+
87+
result = s['qux']
88+
result2 = s.loc['qux']
89+
expected = s[arrays[0] == 'qux']
90+
expected.index = expected.index.droplevel(0)
91+
tm.assert_series_equal(result, expected)
92+
tm.assert_series_equal(result2, expected)
Lines changed: 164 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,164 @@
1+
import numpy as np
2+
import pytest
3+
4+
from pandas.compat import StringIO, lrange, product as cart_product
5+
6+
from pandas import DataFrame, Index, MultiIndex, concat, read_csv
7+
import pandas.core.common as com
8+
from pandas.util import testing as tm
9+
10+
11+
class TestMultiIndexXs(object):
12+
13+
def test_xs_multiindex(self):
14+
15+
# GH2903
16+
columns = MultiIndex.from_tuples(
17+
[('a', 'foo'), ('a', 'bar'), ('b', 'hello'),
18+
('b', 'world')], names=['lvl0', 'lvl1'])
19+
df = DataFrame(np.random.randn(4, 4), columns=columns)
20+
df.sort_index(axis=1, inplace=True)
21+
result = df.xs('a', level='lvl0', axis=1)
22+
expected = df.iloc[:, 0:2].loc[:, 'a']
23+
tm.assert_frame_equal(result, expected)
24+
25+
result = df.xs('foo', level='lvl1', axis=1)
26+
expected = df.iloc[:, 1:2].copy()
27+
expected.columns = expected.columns.droplevel('lvl1')
28+
tm.assert_frame_equal(result, expected)
29+
30+
def test_xs(self, multiindex_dataframe_random_data):
31+
frame = multiindex_dataframe_random_data
32+
xs = frame.xs(('bar', 'two'))
33+
xs2 = frame.loc[('bar', 'two')]
34+
35+
tm.assert_series_equal(xs, xs2)
36+
tm.assert_almost_equal(xs.values, frame.values[4])
37+
38+
# GH 6574
39+
# missing values in returned index should be preserrved
40+
acc = [
41+
('a', 'abcde', 1),
42+
('b', 'bbcde', 2),
43+
('y', 'yzcde', 25),
44+
('z', 'xbcde', 24),
45+
('z', None, 26),
46+
('z', 'zbcde', 25),
47+
('z', 'ybcde', 26),
48+
]
49+
df = DataFrame(acc,
50+
columns=['a1', 'a2', 'cnt']).set_index(['a1', 'a2'])
51+
expected = DataFrame({'cnt': [24, 26, 25, 26]}, index=Index(
52+
['xbcde', np.nan, 'zbcde', 'ybcde'], name='a2'))
53+
54+
result = df.xs('z', level='a1')
55+
tm.assert_frame_equal(result, expected)
56+
57+
def test_xs_with_duplicates(self, multiindex_dataframe_random_data):
58+
# Issue #13719
59+
frame = multiindex_dataframe_random_data
60+
df_dup = concat([frame] * 2)
61+
assert df_dup.index.is_unique is False
62+
expected = concat([frame.xs('one', level='second')] * 2)
63+
tm.assert_frame_equal(df_dup.xs('one', level='second'), expected)
64+
tm.assert_frame_equal(df_dup.xs(['one'], level=['second']), expected)
65+
66+
def test_xs_level(self, multiindex_dataframe_random_data):
67+
frame = multiindex_dataframe_random_data
68+
result = frame.xs('two', level='second')
69+
expected = frame[frame.index.get_level_values(1) == 'two']
70+
expected.index = expected.index.droplevel(1)
71+
72+
tm.assert_frame_equal(result, expected)
73+
74+
index = MultiIndex.from_tuples([('x', 'y', 'z'), ('a', 'b', 'c'), (
75+
'p', 'q', 'r')])
76+
df = DataFrame(np.random.randn(3, 5), index=index)
77+
result = df.xs('c', level=2)
78+
expected = df[1:2]
79+
expected.index = expected.index.droplevel(2)
80+
tm.assert_frame_equal(result, expected)
81+
82+
# this is a copy in 0.14
83+
result = frame.xs('two', level='second')
84+
85+
# setting this will give a SettingWithCopyError
86+
# as we are trying to write a view
87+
def f(x):
88+
x[:] = 10
89+
90+
pytest.raises(com.SettingWithCopyError, f, result)
91+
92+
def test_xs_level_multiple(self):
93+
text = """ A B C D E
94+
one two three four
95+
a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
96+
a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
97+
x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
98+
99+
df = read_csv(StringIO(text), sep=r'\s+', engine='python')
100+
101+
result = df.xs(('a', 4), level=['one', 'four'])
102+
expected = df.xs('a').xs(4, level='four')
103+
tm.assert_frame_equal(result, expected)
104+
105+
# this is a copy in 0.14
106+
result = df.xs(('a', 4), level=['one', 'four'])
107+
108+
# setting this will give a SettingWithCopyError
109+
# as we are trying to write a view
110+
def f(x):
111+
x[:] = 10
112+
113+
pytest.raises(com.SettingWithCopyError, f, result)
114+
115+
# GH2107
116+
dates = lrange(20111201, 20111205)
117+
ids = 'abcde'
118+
idx = MultiIndex.from_tuples([x for x in cart_product(dates, ids)])
119+
idx.names = ['date', 'secid']
120+
df = DataFrame(np.random.randn(len(idx), 3), idx, ['X', 'Y', 'Z'])
121+
122+
rs = df.xs(20111201, level='date')
123+
xp = df.loc[20111201, :]
124+
tm.assert_frame_equal(rs, xp)
125+
126+
def test_xs_level0(self):
127+
text = """ A B C D E
128+
one two three four
129+
a b 10.0032 5 -0.5109 -2.3358 -0.4645 0.05076 0.3640
130+
a q 20 4 0.4473 1.4152 0.2834 1.00661 0.1744
131+
x q 30 3 -0.6662 -0.5243 -0.3580 0.89145 2.5838"""
132+
133+
df = read_csv(StringIO(text), sep=r'\s+', engine='python')
134+
135+
result = df.xs('a', level=0)
136+
expected = df.xs('a')
137+
assert len(result) == 2
138+
tm.assert_frame_equal(result, expected)
139+
140+
def test_xs_level_series(self, multiindex_dataframe_random_data,
141+
multiindex_year_month_day_dataframe_random_data):
142+
frame = multiindex_dataframe_random_data
143+
ymd = multiindex_year_month_day_dataframe_random_data
144+
s = frame['A']
145+
result = s[:, 'two']
146+
expected = frame.xs('two', level=1)['A']
147+
tm.assert_series_equal(result, expected)
148+
149+
s = ymd['A']
150+
result = s[2000, 5]
151+
expected = ymd.loc[2000, 5]['A']
152+
tm.assert_series_equal(result, expected)
153+
154+
# not implementing this for now
155+
156+
pytest.raises(TypeError, s.__getitem__, (2000, slice(3, 4)))
157+
158+
# result = s[2000, 3:4]
159+
# lv =s.index.get_level_values(1)
160+
# expected = s[(lv == 3) | (lv == 4)]
161+
# expected.index = expected.index.droplevel(0)
162+
# tm.assert_series_equal(result, expected)
163+
164+
# can do this though

‎pandas/tests/indexing/test_multiindex.py

Lines changed: 0 additions & 2249 deletions
This file was deleted.

0 commit comments

Comments
 (0)
Please sign in to comment.