Skip to content

REF/TST: Add more pytest idiom to indexing/multiindex/test_getitem.py #24452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 28, 2018
184 changes: 95 additions & 89 deletions pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import numpy as np
import pytest

from pandas.compat import StringIO, lrange, range, u, zip
from pandas.compat import range, u, zip

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
Expand All @@ -10,6 +10,28 @@
from pandas.util import testing as tm


@pytest.fixture
def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.randn(6, 2), index=index)


@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [['a', 'd', 'e', 'c', 'f', 'b'],
[1, 4, 5, 3, 6, 2],
[1, 4, 5, 3, 6, 2]]
index = ['h1', 'h3', 'h5']
columns = MultiIndex(
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=['main', 'sub'])
return DataFrame(data, index=index, columns=columns)


@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
lambda s, x: s.loc[:, x],
lambda s, x: s.xs(x, level=1)])
Expand Down Expand Up @@ -206,116 +228,104 @@ def test_series_getitem_corner_generator(


def test_frame_getitem_multicolumn_empty_level():
f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
['level3 item1', 'level3 item2']]
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
['level3 item1', 'level3 item2']]

result = f['level1 item1']
expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
result = df['level1 item1']
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
columns=['level3 item1'])
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_getitem_tuple_plus_slice():
# GH #671
df = DataFrame({'a': lrange(10),
'b': lrange(10),
# GH 671
df = DataFrame({'a': np.arange(10),
'b': np.arange(10),
'c': np.random.randn(10),
'd': np.random.randn(10)})

idf = df.set_index(['a', 'b'])

result = idf.loc[(0, 0), :]
expected = idf.loc[0, 0]
expected2 = idf.xs((0, 0))
expected3 = idf.ix[0, 0]

'd': np.random.randn(10)}
).set_index(['a', 'b'])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected2)
tm.assert_series_equal(result, expected3)


def test_getitem_toplevel(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T

result = df['foo']
expected = df.reindex(columns=df.columns[:3])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)

result = df['bar']
result2 = df.loc[:, 'bar']

expected = df.reindex(columns=df.columns[3:5])
@pytest.mark.parametrize('indexer,expected_slice', [
(lambda df: df['foo'], slice(3)),
(lambda df: df['bar'], slice(3, 5)),
(lambda df: df.loc[:, 'bar'], slice(3, 5))
])
def test_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)


def test_getitem_int(multiindex_dataframe_random_data):
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)

frame = DataFrame(np.random.randn(6, 2), index=index)

result = frame.loc[1]
expected = frame[-3:]
def test_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)

# raises exception

def test_getitem_int_raises_exception(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
msg = "3"
with pytest.raises(KeyError, match=msg):
frame.loc.__getitem__(3)
df.loc.__getitem__(3)

# however this will work
frame = multiindex_dataframe_random_data
result = frame.iloc[2]
expected = frame.xs(frame.index[2])
tm.assert_series_equal(result, expected)

def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)

def test_frame_getitem_view(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()

def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
df['foo'].values[:] = 0
assert (df['foo'].values == 0).all()

# but not if it's mixed-type
df['foo', 'four'] = 'foo'
df = df.sort_index(level=0, axis=1)

# this will work, but will raise/warn as its chained assignment
def f():
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df['foo']['one'] = 2
return df


def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
df['foo']['one'] = 2

try:
df = f()
except ValueError:
pass
assert (df['foo', 'one'] == 0).all()
result = df
tm.assert_frame_equal(result, expected)


def test_getitem_lowerdim_corner(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
msg = "11"
with pytest.raises(KeyError, match=msg):
frame.loc.__getitem__((('bar', 'three'), 'B'))
df = multiindex_dataframe_random_data

# test setup - check key not in dataframe
with pytest.raises(KeyError, match="11"):
df.loc[('bar', 'three'), 'B']

# in theory should be inserting in a sorted space????
frame.loc[('bar', 'three'), 'B'] = 0
assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
df.loc[('bar', 'three'), 'B'] = 0
expected = 0
result = df.sort_index().loc[('bar', 'three'), 'B']
assert result == expected


@pytest.mark.parametrize('unicode_strings', [True, False])
Expand Down Expand Up @@ -345,41 +355,37 @@ def test_mixed_depth_get(unicode_strings):
tm.assert_series_equal(result, expected)


def test_mi_access():

@pytest.mark.parametrize('indexer', [
lambda df: df.loc[:, ('A', 'A1')],
lambda df: df[('A', 'A1')]
])
def test_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
data = """h1 main h3 sub h5
0 a A 1 A1 1
1 b B 2 B1 2
2 c B 3 A1 3
3 d A 4 B2 4
4 e A 5 B2 5
5 f B 6 A2 6
"""

df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
df2 = df.set_index(['main', 'sub']).T.sort_index(1)
df = dataframe_with_duplicate_index
index = Index(['h1', 'h3', 'h5'])
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T

result = df2.loc[:, ('A', 'A1')]
result = indexer(df)
tm.assert_frame_equal(result, expected)

result = df2[('A', 'A1')]
tm.assert_frame_equal(result, expected)

def test_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
result = df2['A']['A1']
result = df['A']['A1']
tm.assert_series_equal(result, expected)


def test_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
index=Index(['B2', 'B2'], name='sub'),
columns=['h1', 'h3', 'h5'], ).T
result = df2['A']['B2']
result = df['A']['B2']
tm.assert_frame_equal(result, expected)