Skip to content

Commit e830ed7

Browse files
simonjayhawkinsPingviinituutti
authored andcommitted
REF/TST: Add more pytest idiom to indexing/multiindex/test_getitem.py (pandas-dev#24452)
1 parent 85bd799 commit e830ed7

File tree

1 file changed

+95
-89
lines changed

1 file changed

+95
-89
lines changed

pandas/tests/indexing/multiindex/test_getitem.py

+95-89
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import numpy as np
22
import pytest
33

4-
from pandas.compat import StringIO, lrange, range, u, zip
4+
from pandas.compat import range, u, zip
55

66
import pandas as pd
77
from pandas import DataFrame, Index, MultiIndex, Series
@@ -10,6 +10,28 @@
1010
from pandas.util import testing as tm
1111

1212

13+
@pytest.fixture
14+
def frame_random_data_integer_multi_index():
15+
levels = [[0, 1], [0, 1, 2]]
16+
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
17+
index = MultiIndex(levels=levels, codes=codes)
18+
return DataFrame(np.random.randn(6, 2), index=index)
19+
20+
21+
@pytest.fixture
22+
def dataframe_with_duplicate_index():
23+
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
24+
data = [['a', 'd', 'e', 'c', 'f', 'b'],
25+
[1, 4, 5, 3, 6, 2],
26+
[1, 4, 5, 3, 6, 2]]
27+
index = ['h1', 'h3', 'h5']
28+
columns = MultiIndex(
29+
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
30+
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
31+
names=['main', 'sub'])
32+
return DataFrame(data, index=index, columns=columns)
33+
34+
1335
@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
1436
lambda s, x: s.loc[:, x],
1537
lambda s, x: s.xs(x, level=1)])
@@ -206,116 +228,104 @@ def test_series_getitem_corner_generator(
206228

207229

208230
def test_frame_getitem_multicolumn_empty_level():
209-
f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
210-
f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
211-
['level3 item1', 'level3 item2']]
231+
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
232+
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
233+
['level3 item1', 'level3 item2']]
212234

213-
result = f['level1 item1']
214-
expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
235+
result = df['level1 item1']
236+
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
215237
columns=['level3 item1'])
216238
tm.assert_frame_equal(result, expected)
217239

218240

219-
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
220241
def test_getitem_tuple_plus_slice():
221-
# GH #671
222-
df = DataFrame({'a': lrange(10),
223-
'b': lrange(10),
242+
# GH 671
243+
df = DataFrame({'a': np.arange(10),
244+
'b': np.arange(10),
224245
'c': np.random.randn(10),
225-
'd': np.random.randn(10)})
226-
227-
idf = df.set_index(['a', 'b'])
228-
229-
result = idf.loc[(0, 0), :]
230-
expected = idf.loc[0, 0]
231-
expected2 = idf.xs((0, 0))
232-
expected3 = idf.ix[0, 0]
233-
246+
'd': np.random.randn(10)}
247+
).set_index(['a', 'b'])
248+
expected = df.loc[0, 0]
249+
result = df.loc[(0, 0), :]
234250
tm.assert_series_equal(result, expected)
235-
tm.assert_series_equal(result, expected2)
236-
tm.assert_series_equal(result, expected3)
237251

238252

239-
def test_getitem_toplevel(multiindex_dataframe_random_data):
240-
frame = multiindex_dataframe_random_data
241-
df = frame.T
242-
243-
result = df['foo']
244-
expected = df.reindex(columns=df.columns[:3])
245-
expected.columns = expected.columns.droplevel(0)
246-
tm.assert_frame_equal(result, expected)
247-
248-
result = df['bar']
249-
result2 = df.loc[:, 'bar']
250-
251-
expected = df.reindex(columns=df.columns[3:5])
253+
@pytest.mark.parametrize('indexer,expected_slice', [
254+
(lambda df: df['foo'], slice(3)),
255+
(lambda df: df['bar'], slice(3, 5)),
256+
(lambda df: df.loc[:, 'bar'], slice(3, 5))
257+
])
258+
def test_getitem_toplevel(
259+
multiindex_dataframe_random_data, indexer, expected_slice):
260+
df = multiindex_dataframe_random_data.T
261+
expected = df.reindex(columns=df.columns[expected_slice])
252262
expected.columns = expected.columns.droplevel(0)
263+
result = indexer(df)
253264
tm.assert_frame_equal(result, expected)
254-
tm.assert_frame_equal(result, result2)
255265

256266

257-
def test_getitem_int(multiindex_dataframe_random_data):
258-
levels = [[0, 1], [0, 1, 2]]
259-
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
260-
index = MultiIndex(levels=levels, codes=codes)
261-
262-
frame = DataFrame(np.random.randn(6, 2), index=index)
263-
264-
result = frame.loc[1]
265-
expected = frame[-3:]
267+
def test_getitem_int(frame_random_data_integer_multi_index):
268+
df = frame_random_data_integer_multi_index
269+
result = df.loc[1]
270+
expected = df[-3:]
266271
expected.index = expected.index.droplevel(0)
267272
tm.assert_frame_equal(result, expected)
268273

269-
# raises exception
274+
275+
def test_getitem_int_raises_exception(frame_random_data_integer_multi_index):
276+
df = frame_random_data_integer_multi_index
270277
msg = "3"
271278
with pytest.raises(KeyError, match=msg):
272-
frame.loc.__getitem__(3)
279+
df.loc.__getitem__(3)
273280

274-
# however this will work
275-
frame = multiindex_dataframe_random_data
276-
result = frame.iloc[2]
277-
expected = frame.xs(frame.index[2])
278-
tm.assert_series_equal(result, expected)
279281

282+
def test_getitem_iloc(multiindex_dataframe_random_data):
283+
df = multiindex_dataframe_random_data
284+
result = df.iloc[2]
285+
expected = df.xs(df.index[2])
286+
tm.assert_series_equal(result, expected)
280287

281-
def test_frame_getitem_view(multiindex_dataframe_random_data):
282-
frame = multiindex_dataframe_random_data
283-
df = frame.T.copy()
284288

289+
def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
285290
# this works because we are modifying the underlying array
286291
# really a no-no
292+
df = multiindex_dataframe_random_data.T
287293
df['foo'].values[:] = 0
288294
assert (df['foo'].values == 0).all()
289295

290-
# but not if it's mixed-type
291-
df['foo', 'four'] = 'foo'
292-
df = df.sort_index(level=0, axis=1)
293296

294-
# this will work, but will raise/warn as its chained assignment
295-
def f():
297+
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
298+
# will raise/warn as its chained assignment
299+
df = multiindex_dataframe_random_data.T
300+
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
301+
with pytest.raises(com.SettingWithCopyError, match=msg):
296302
df['foo']['one'] = 2
297-
return df
298303

304+
305+
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
306+
frame = multiindex_dataframe_random_data.T
307+
expected = frame
308+
df = frame.copy()
299309
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
300310
with pytest.raises(com.SettingWithCopyError, match=msg):
301311
df['foo']['one'] = 2
302312

303-
try:
304-
df = f()
305-
except ValueError:
306-
pass
307-
assert (df['foo', 'one'] == 0).all()
313+
result = df
314+
tm.assert_frame_equal(result, expected)
308315

309316

310317
def test_getitem_lowerdim_corner(multiindex_dataframe_random_data):
311-
frame = multiindex_dataframe_random_data
312-
msg = "11"
313-
with pytest.raises(KeyError, match=msg):
314-
frame.loc.__getitem__((('bar', 'three'), 'B'))
318+
df = multiindex_dataframe_random_data
319+
320+
# test setup - check key not in dataframe
321+
with pytest.raises(KeyError, match="11"):
322+
df.loc[('bar', 'three'), 'B']
315323

316324
# in theory should be inserting in a sorted space????
317-
frame.loc[('bar', 'three'), 'B'] = 0
318-
assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
325+
df.loc[('bar', 'three'), 'B'] = 0
326+
expected = 0
327+
result = df.sort_index().loc[('bar', 'three'), 'B']
328+
assert result == expected
319329

320330

321331
@pytest.mark.parametrize('unicode_strings', [True, False])
@@ -345,41 +355,37 @@ def test_mixed_depth_get(unicode_strings):
345355
tm.assert_series_equal(result, expected)
346356

347357

348-
def test_mi_access():
349-
358+
@pytest.mark.parametrize('indexer', [
359+
lambda df: df.loc[:, ('A', 'A1')],
360+
lambda df: df[('A', 'A1')]
361+
])
362+
def test_mi_access(dataframe_with_duplicate_index, indexer):
350363
# GH 4145
351-
data = """h1 main h3 sub h5
352-
0 a A 1 A1 1
353-
1 b B 2 B1 2
354-
2 c B 3 A1 3
355-
3 d A 4 B2 4
356-
4 e A 5 B2 5
357-
5 f B 6 A2 6
358-
"""
359-
360-
df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
361-
df2 = df.set_index(['main', 'sub']).T.sort_index(1)
364+
df = dataframe_with_duplicate_index
362365
index = Index(['h1', 'h3', 'h5'])
363366
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
364367
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
365368

366-
result = df2.loc[:, ('A', 'A1')]
369+
result = indexer(df)
367370
tm.assert_frame_equal(result, expected)
368371

369-
result = df2[('A', 'A1')]
370-
tm.assert_frame_equal(result, expected)
371372

373+
def test_mi_access_returns_series(dataframe_with_duplicate_index):
372374
# GH 4146, not returning a block manager when selecting a unique index
373375
# from a duplicate index
374376
# as of 4879, this returns a Series (which is similar to what happens
375377
# with a non-unique)
378+
df = dataframe_with_duplicate_index
376379
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
377-
result = df2['A']['A1']
380+
result = df['A']['A1']
378381
tm.assert_series_equal(result, expected)
379382

383+
384+
def test_mi_access_returns_frame(dataframe_with_duplicate_index):
380385
# selecting a non_unique from the 2nd level
386+
df = dataframe_with_duplicate_index
381387
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
382388
index=Index(['B2', 'B2'], name='sub'),
383389
columns=['h1', 'h3', 'h5'], ).T
384-
result = df2['A']['B2']
390+
result = df['A']['B2']
385391
tm.assert_frame_equal(result, expected)

0 commit comments

Comments
 (0)