Skip to content

Commit 6329436

Browse files
simonjayhawkinsPingviinituutti
authored andcommitted
CLN/TST: indexing/multiindex/test_getitem.py (pandas-dev#24741)
1 parent 609440e commit 6329436

File tree

4 files changed

+225
-214
lines changed

4 files changed

+225
-214
lines changed

pandas/tests/indexing/multiindex/test_getitem.py

+60-214
Original file line numberDiff line numberDiff line change
@@ -1,35 +1,15 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.compat import range, u, zip
4+
from pandas.compat import u, zip
55

6-
import pandas as pd
76
from pandas import DataFrame, Index, MultiIndex, Series
8-
import pandas.core.common as com
97
from pandas.core.indexing import IndexingError
108
from pandas.util import testing as tm
119

12-
13-
@pytest.fixture
14-
def frame_random_data_integer_multi_index():
15-
levels = [[0, 1], [0, 1, 2]]
16-
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
17-
index = MultiIndex(levels=levels, codes=codes)
18-
return DataFrame(np.random.randn(6, 2), index=index)
19-
20-
21-
@pytest.fixture
22-
def dataframe_with_duplicate_index():
23-
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
24-
data = [['a', 'd', 'e', 'c', 'f', 'b'],
25-
[1, 4, 5, 3, 6, 2],
26-
[1, 4, 5, 3, 6, 2]]
27-
index = ['h1', 'h3', 'h5']
28-
columns = MultiIndex(
29-
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
30-
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
31-
names=['main', 'sub'])
32-
return DataFrame(data, index=index, columns=columns)
10+
# ----------------------------------------------------------------------------
11+
# test indexing of Series with multi-level Index
12+
# ----------------------------------------------------------------------------
3313

3414

3515
@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
@@ -51,7 +31,7 @@ def test_series_getitem_multiindex(access_method, level1_value, expected):
5131

5232

5333
@pytest.mark.parametrize('level0_value', ['D', 'A'])
54-
def test_getitem_duplicates_multiindex(level0_value):
34+
def test_series_getitem_duplicates_multiindex(level0_value):
5535
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
5636
# the appropriate error, only in PY3 of course!
5737

@@ -65,12 +45,10 @@ def test_getitem_duplicates_multiindex(level0_value):
6545

6646
# confirm indexing on missing value raises KeyError
6747
if level0_value != 'A':
68-
msg = "'A'"
69-
with pytest.raises(KeyError, match=msg):
48+
with pytest.raises(KeyError, match=r"^'A'$"):
7049
df.val['A']
7150

72-
msg = "'X'"
73-
with pytest.raises(KeyError, match=msg):
51+
with pytest.raises(KeyError, match=r"^'X'$"):
7452
df.val['X']
7553

7654
result = df.val[level0_value]
@@ -79,89 +57,6 @@ def test_getitem_duplicates_multiindex(level0_value):
7957
tm.assert_series_equal(result, expected)
8058

8159

82-
@pytest.mark.parametrize('indexer, is_level1, expected_error', [
83-
([], False, None), # empty ok
84-
(['A'], False, None),
85-
(['A', 'D'], False, None),
86-
(['D'], False, r"\['D'\] not in index"), # not any values found
87-
(pd.IndexSlice[:, ['foo']], True, None),
88-
(pd.IndexSlice[:, ['foo', 'bah']], True, None)
89-
])
90-
def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1,
91-
expected_error):
92-
# GH 7866
93-
# multi-index slicing with missing indexers
94-
idx = MultiIndex.from_product([['A', 'B', 'C'],
95-
['foo', 'bar', 'baz']],
96-
names=['one', 'two'])
97-
s = Series(np.arange(9, dtype='int64'), index=idx).sort_index()
98-
99-
if indexer == []:
100-
expected = s.iloc[[]]
101-
elif is_level1:
102-
expected = Series([0, 3, 6], index=MultiIndex.from_product(
103-
[['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index()
104-
else:
105-
exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']],
106-
names=['one', 'two'])
107-
expected = Series(np.arange(3, dtype='int64'),
108-
index=exp_idx).sort_index()
109-
110-
if expected_error is not None:
111-
with pytest.raises(KeyError, match=expected_error):
112-
s.loc[indexer]
113-
else:
114-
result = s.loc[indexer]
115-
tm.assert_series_equal(result, expected)
116-
117-
118-
@pytest.mark.parametrize('columns_indexer', [
119-
([], slice(None)),
120-
(['foo'], [])
121-
])
122-
def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer):
123-
# GH 8737
124-
# empty indexer
125-
multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'],
126-
['alpha', 'beta']))
127-
df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index)
128-
df = df.sort_index(level=0, axis=1)
129-
130-
expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0])
131-
result = df.loc[:, columns_indexer]
132-
tm.assert_frame_equal(result, expected)
133-
134-
135-
def test_getitem_duplicates_multiindex_non_scalar_type_object():
136-
# regression from < 0.14.0
137-
# GH 7914
138-
df = DataFrame([[np.mean, np.median], ['mean', 'median']],
139-
columns=MultiIndex.from_tuples([('functs', 'mean'),
140-
('functs', 'median')]),
141-
index=['function', 'name'])
142-
result = df.loc['function', ('functs', 'mean')]
143-
expected = np.mean
144-
assert result == expected
145-
146-
147-
def test_getitem_simple(multiindex_dataframe_random_data):
148-
df = multiindex_dataframe_random_data.T
149-
expected = df.values[:, 0]
150-
result = df['foo', 'one'].values
151-
tm.assert_almost_equal(result, expected)
152-
153-
154-
@pytest.mark.parametrize('indexer,msg', [
155-
(lambda df: df[('foo', 'four')], r"\('foo', 'four'\)"),
156-
(lambda df: df['foobar'], "'foobar'")
157-
])
158-
def test_getitem_simple_key_error(
159-
multiindex_dataframe_random_data, indexer, msg):
160-
df = multiindex_dataframe_random_data.T
161-
with pytest.raises(KeyError, match=msg):
162-
indexer(df)
163-
164-
16560
@pytest.mark.parametrize('indexer', [
16661
lambda s: s[2000, 3],
16762
lambda s: s.loc[2000, 3]
@@ -189,33 +84,20 @@ def test_series_getitem_returns_scalar(
18984
assert result == expected
19085

19186

192-
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
193-
@pytest.mark.parametrize('indexer', [
194-
lambda s: s.loc[[(2000, 3, 10), (2000, 3, 13)]],
195-
lambda s: s.ix[[(2000, 3, 10), (2000, 3, 13)]]
196-
])
197-
def test_series_getitem_fancy(
198-
multiindex_year_month_day_dataframe_random_data, indexer):
199-
s = multiindex_year_month_day_dataframe_random_data['A']
200-
expected = s.reindex(s.index[49:51])
201-
202-
result = indexer(s)
203-
tm.assert_series_equal(result, expected)
204-
205-
206-
@pytest.mark.parametrize('indexer,error,msg', [
207-
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, '356'),
208-
(lambda s: s[(2000, 3, 4)], KeyError, '356'),
87+
@pytest.mark.parametrize('indexer,expected_error,expected_error_msg', [
88+
(lambda s: s.__getitem__((2000, 3, 4)), KeyError, r"^356L?$"),
89+
(lambda s: s[(2000, 3, 4)], KeyError, r"^356L?$"),
20990
(lambda s: s.loc[(2000, 3, 4)], IndexingError, 'Too many indexers'),
21091
(lambda s: s.__getitem__(len(s)), IndexError, 'index out of bounds'),
21192
(lambda s: s[len(s)], IndexError, 'index out of bounds'),
21293
(lambda s: s.iloc[len(s)], IndexError,
21394
'single positional indexer is out-of-bounds')
21495
])
21596
def test_series_getitem_indexing_errors(
216-
multiindex_year_month_day_dataframe_random_data, indexer, error, msg):
97+
multiindex_year_month_day_dataframe_random_data, indexer,
98+
expected_error, expected_error_msg):
21799
s = multiindex_year_month_day_dataframe_random_data['A']
218-
with pytest.raises(error, match=msg):
100+
with pytest.raises(expected_error, match=expected_error_msg):
219101
indexer(s)
220102

221103

@@ -227,6 +109,28 @@ def test_series_getitem_corner_generator(
227109
tm.assert_series_equal(result, expected)
228110

229111

112+
# ----------------------------------------------------------------------------
113+
# test indexing of DataFrame with multi-level Index
114+
# ----------------------------------------------------------------------------
115+
116+
def test_getitem_simple(multiindex_dataframe_random_data):
117+
df = multiindex_dataframe_random_data.T
118+
expected = df.values[:, 0]
119+
result = df['foo', 'one'].values
120+
tm.assert_almost_equal(result, expected)
121+
122+
123+
@pytest.mark.parametrize('indexer,expected_error_msg', [
124+
(lambda df: df[('foo', 'four')], r"^\('foo', 'four'\)$"),
125+
(lambda df: df['foobar'], r"^'foobar'$")
126+
])
127+
def test_frame_getitem_simple_key_error(
128+
multiindex_dataframe_random_data, indexer, expected_error_msg):
129+
df = multiindex_dataframe_random_data.T
130+
with pytest.raises(KeyError, match=expected_error_msg):
131+
indexer(df)
132+
133+
230134
def test_frame_getitem_multicolumn_empty_level():
231135
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
232136
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
@@ -238,24 +142,12 @@ def test_frame_getitem_multicolumn_empty_level():
238142
tm.assert_frame_equal(result, expected)
239143

240144

241-
def test_getitem_tuple_plus_slice():
242-
# GH 671
243-
df = DataFrame({'a': np.arange(10),
244-
'b': np.arange(10),
245-
'c': np.random.randn(10),
246-
'd': np.random.randn(10)}
247-
).set_index(['a', 'b'])
248-
expected = df.loc[0, 0]
249-
result = df.loc[(0, 0), :]
250-
tm.assert_series_equal(result, expected)
251-
252-
253145
@pytest.mark.parametrize('indexer,expected_slice', [
254146
(lambda df: df['foo'], slice(3)),
255147
(lambda df: df['bar'], slice(3, 5)),
256148
(lambda df: df.loc[:, 'bar'], slice(3, 5))
257149
])
258-
def test_getitem_toplevel(
150+
def test_frame_getitem_toplevel(
259151
multiindex_dataframe_random_data, indexer, expected_slice):
260152
df = multiindex_dataframe_random_data.T
261153
expected = df.reindex(columns=df.columns[expected_slice])
@@ -264,72 +156,8 @@ def test_getitem_toplevel(
264156
tm.assert_frame_equal(result, expected)
265157

266158

267-
def test_getitem_int(frame_random_data_integer_multi_index):
268-
df = frame_random_data_integer_multi_index
269-
result = df.loc[1]
270-
expected = df[-3:]
271-
expected.index = expected.index.droplevel(0)
272-
tm.assert_frame_equal(result, expected)
273-
274-
275-
def test_getitem_int_raises_exception(frame_random_data_integer_multi_index):
276-
df = frame_random_data_integer_multi_index
277-
msg = "3"
278-
with pytest.raises(KeyError, match=msg):
279-
df.loc.__getitem__(3)
280-
281-
282-
def test_getitem_iloc(multiindex_dataframe_random_data):
283-
df = multiindex_dataframe_random_data
284-
result = df.iloc[2]
285-
expected = df.xs(df.index[2])
286-
tm.assert_series_equal(result, expected)
287-
288-
289-
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
290-
# this works because we are modifying the underlying array
291-
# really a no-no
292-
df = multiindex_dataframe_random_data.T
293-
df['foo'].values[:] = 0
294-
assert (df['foo'].values == 0).all()
295-
296-
297-
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
298-
# will raise/warn as its chained assignment
299-
df = multiindex_dataframe_random_data.T
300-
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
301-
with pytest.raises(com.SettingWithCopyError, match=msg):
302-
df['foo']['one'] = 2
303-
304-
305-
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
306-
frame = multiindex_dataframe_random_data.T
307-
expected = frame
308-
df = frame.copy()
309-
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
310-
with pytest.raises(com.SettingWithCopyError, match=msg):
311-
df['foo']['one'] = 2
312-
313-
result = df
314-
tm.assert_frame_equal(result, expected)
315-
316-
317-
def test_getitem_lowerdim_corner(multiindex_dataframe_random_data):
318-
df = multiindex_dataframe_random_data
319-
320-
# test setup - check key not in dataframe
321-
with pytest.raises(KeyError, match="11"):
322-
df.loc[('bar', 'three'), 'B']
323-
324-
# in theory should be inserting in a sorted space????
325-
df.loc[('bar', 'three'), 'B'] = 0
326-
expected = 0
327-
result = df.sort_index().loc[('bar', 'three'), 'B']
328-
assert result == expected
329-
330-
331159
@pytest.mark.parametrize('unicode_strings', [True, False])
332-
def test_mixed_depth_get(unicode_strings):
160+
def test_frame_mixed_depth_get(unicode_strings):
333161
# If unicode_strings is True, the column labels in dataframe
334162
# construction will use unicode strings in Python 2 (pull request
335163
# #17099).
@@ -355,11 +183,29 @@ def test_mixed_depth_get(unicode_strings):
355183
tm.assert_series_equal(result, expected)
356184

357185

186+
# ----------------------------------------------------------------------------
187+
# test indexing of DataFrame with multi-level Index with duplicates
188+
# ----------------------------------------------------------------------------
189+
190+
@pytest.fixture
191+
def dataframe_with_duplicate_index():
192+
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
193+
data = [['a', 'd', 'e', 'c', 'f', 'b'],
194+
[1, 4, 5, 3, 6, 2],
195+
[1, 4, 5, 3, 6, 2]]
196+
index = ['h1', 'h3', 'h5']
197+
columns = MultiIndex(
198+
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
199+
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
200+
names=['main', 'sub'])
201+
return DataFrame(data, index=index, columns=columns)
202+
203+
358204
@pytest.mark.parametrize('indexer', [
359-
lambda df: df.loc[:, ('A', 'A1')],
360-
lambda df: df[('A', 'A1')]
205+
lambda df: df[('A', 'A1')],
206+
lambda df: df.loc[:, ('A', 'A1')]
361207
])
362-
def test_mi_access(dataframe_with_duplicate_index, indexer):
208+
def test_frame_mi_access(dataframe_with_duplicate_index, indexer):
363209
# GH 4145
364210
df = dataframe_with_duplicate_index
365211
index = Index(['h1', 'h3', 'h5'])
@@ -370,7 +216,7 @@ def test_mi_access(dataframe_with_duplicate_index, indexer):
370216
tm.assert_frame_equal(result, expected)
371217

372218

373-
def test_mi_access_returns_series(dataframe_with_duplicate_index):
219+
def test_frame_mi_access_returns_series(dataframe_with_duplicate_index):
374220
# GH 4146, not returning a block manager when selecting a unique index
375221
# from a duplicate index
376222
# as of 4879, this returns a Series (which is similar to what happens
@@ -381,7 +227,7 @@ def test_mi_access_returns_series(dataframe_with_duplicate_index):
381227
tm.assert_series_equal(result, expected)
382228

383229

384-
def test_mi_access_returns_frame(dataframe_with_duplicate_index):
230+
def test_frame_mi_access_returns_frame(dataframe_with_duplicate_index):
385231
# selecting a non_unique from the 2nd level
386232
df = dataframe_with_duplicate_index
387233
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],

pandas/tests/indexing/multiindex/test_iloc.py

+7
Original file line numberDiff line numberDiff line change
@@ -142,3 +142,10 @@ def test_iloc_setitem_int_multiindex_series(data, indexes, values, expected_k):
142142
df['k'] = expected_k
143143
expected = df.k
144144
tm.assert_series_equal(series, expected)
145+
146+
147+
def test_getitem_iloc(multiindex_dataframe_random_data):
148+
df = multiindex_dataframe_random_data
149+
result = df.iloc[2]
150+
expected = df.xs(df.index[2])
151+
tm.assert_series_equal(result, expected)

0 commit comments

Comments
 (0)