Skip to content

REF/TST: Add more pytest idiom to indexing/multiindex/test_getitem.py #24452

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 28, 2018
178 changes: 93 additions & 85 deletions pandas/tests/indexing/multiindex/test_getitem.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import numpy as np
import pytest

from pandas.compat import StringIO, lrange, range, u, zip
from pandas.compat import range, u, zip

import pandas as pd
from pandas import DataFrame, Index, MultiIndex, Series
import pandas.core.common as com
from pandas.core.common import SettingWithCopyError
from pandas.core.indexing import IndexingError
from pandas.util import testing as tm

Expand Down Expand Up @@ -206,116 +206,114 @@ def test_series_getitem_corner_generator(


def test_frame_getitem_multicolumn_empty_level():
f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
['level3 item1', 'level3 item2']]
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']})
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'],
['level3 item1', 'level3 item2']]

result = f['level1 item1']
expected = DataFrame([['1'], ['2'], ['3']], index=f.index,
result = df['level1 item1']
expected = DataFrame([['1'], ['2'], ['3']], index=df.index,
columns=['level3 item1'])
tm.assert_frame_equal(result, expected)


@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning")
def test_getitem_tuple_plus_slice():
# GH #671
df = DataFrame({'a': lrange(10),
'b': lrange(10),
# GH 671
df = DataFrame({'a': np.arange(10),
'b': np.arange(10),
'c': np.random.randn(10),
'd': np.random.randn(10)})

idf = df.set_index(['a', 'b'])

result = idf.loc[(0, 0), :]
expected = idf.loc[0, 0]
expected2 = idf.xs((0, 0))
expected3 = idf.ix[0, 0]

'd': np.random.randn(10)}
).set_index(['a', 'b'])
expected = df.loc[0, 0]
result = df.loc[(0, 0), :]
tm.assert_series_equal(result, expected)
tm.assert_series_equal(result, expected2)
tm.assert_series_equal(result, expected3)


def test_getitem_toplevel(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T

result = df['foo']
expected = df.reindex(columns=df.columns[:3])
expected.columns = expected.columns.droplevel(0)
tm.assert_frame_equal(result, expected)

result = df['bar']
result2 = df.loc[:, 'bar']

expected = df.reindex(columns=df.columns[3:5])
@pytest.mark.parametrize('indexer,expected_slice', [
(lambda df: df['foo'], slice(3)),
(lambda df: df['bar'], slice(3, 5)),
(lambda df: df.loc[:, 'bar'], slice(3, 5))
])
def test_getitem_toplevel(
multiindex_dataframe_random_data, indexer, expected_slice):
df = multiindex_dataframe_random_data.T
expected = df.reindex(columns=df.columns[expected_slice])
expected.columns = expected.columns.droplevel(0)
result = indexer(df)
tm.assert_frame_equal(result, expected)
tm.assert_frame_equal(result, result2)


def test_getitem_int(multiindex_dataframe_random_data):
@pytest.fixture
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ideally put the fixtures at the top

def frame_random_data_integer_multi_index():
levels = [[0, 1], [0, 1, 2]]
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]]
index = MultiIndex(levels=levels, codes=codes)
return DataFrame(np.random.randn(6, 2), index=index)

frame = DataFrame(np.random.randn(6, 2), index=index)

result = frame.loc[1]
expected = frame[-3:]
def test_getitem_int(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
result = df.loc[1]
expected = df[-3:]
expected.index = expected.index.droplevel(0)
tm.assert_frame_equal(result, expected)

# raises exception

def test_getitem_int_raises_exception(frame_random_data_integer_multi_index):
df = frame_random_data_integer_multi_index
msg = "3"
with pytest.raises(KeyError, match=msg):
frame.loc.__getitem__(3)
df.loc.__getitem__(3)

# however this will work
frame = multiindex_dataframe_random_data
result = frame.iloc[2]
expected = frame.xs(frame.index[2])
tm.assert_series_equal(result, expected)

def test_getitem_iloc(multiindex_dataframe_random_data):
df = multiindex_dataframe_random_data
result = df.iloc[2]
expected = df.xs(df.index[2])
tm.assert_series_equal(result, expected)

def test_frame_getitem_view(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
df = frame.T.copy()

def test_frame_setitem_view_direct(multiindex_dataframe_random_data):
# this works because we are modifying the underlying array
# really a no-no
df = multiindex_dataframe_random_data.T
df['foo'].values[:] = 0
assert (df['foo'].values == 0).all()

# but not if it's mixed-type
df['foo', 'four'] = 'foo'
df = df.sort_index(level=0, axis=1)

# this will work, but will raise/warn as its chained assignment
def f():
df['foo']['one'] = 2
return df

def test_frame_setitem_copy_raises(multiindex_dataframe_random_data):
# will raise/warn as its chained assignment
df = multiindex_dataframe_random_data.T
msg = "A value is trying to be set on a copy of a slice from a DataFrame"
with pytest.raises(com.SettingWithCopyError, match=msg):
with pytest.raises(SettingWithCopyError, match=msg):
df['foo']['one'] = 2


def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data.T
expected = frame
df = frame.copy()

try:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

don't do things like this; use the context manager like above

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i've changed as requested. though not convinced this is the right thing to do in situations like this. the previous test tested that the chained assignment raised the correct exception and this test is to check that the dataframe values are unchanged. having a context manager in the test adds unnecessary/unseen setup and teardown code that could potentially interfere with the tested functionality.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah this must have been changed a while back, the context manager is the correct idiom

df = f()
except ValueError:
df['foo']['one'] = 2
except SettingWithCopyError:
pass
assert (df['foo', 'one'] == 0).all()

result = df
tm.assert_frame_equal(result, expected)


def test_getitem_lowerdim_corner(multiindex_dataframe_random_data):
frame = multiindex_dataframe_random_data
msg = "11"
with pytest.raises(KeyError, match=msg):
frame.loc.__getitem__((('bar', 'three'), 'B'))
df = multiindex_dataframe_random_data

# test setup - check key not in dataframe
with pytest.raises(KeyError, match="11"):
df.loc[('bar', 'three'), 'B']

# in theory should be inserting in a sorted space????
frame.loc[('bar', 'three'), 'B'] = 0
assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0
df.loc[('bar', 'three'), 'B'] = 0
expected = 0
result = df.sort_index().loc[('bar', 'three'), 'B']
assert result == expected


@pytest.mark.parametrize('unicode_strings', [True, False])
Expand Down Expand Up @@ -345,41 +343,51 @@ def test_mixed_depth_get(unicode_strings):
tm.assert_series_equal(result, expected)


def test_mi_access():
@pytest.fixture
def dataframe_with_duplicate_index():
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
data = [['a', 'd', 'e', 'c', 'f', 'b'],
[1, 4, 5, 3, 6, 2],
[1, 4, 5, 3, 6, 2]]
index = ['h1', 'h3', 'h5']
columns = MultiIndex(
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']],
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]],
names=['main', 'sub'])
return DataFrame(data, index=index, columns=columns)


@pytest.mark.parametrize('indexer', [
lambda df: df.loc[:, ('A', 'A1')],
lambda df: df[('A', 'A1')]
])
def test_mi_access(dataframe_with_duplicate_index, indexer):
# GH 4145
data = """h1 main h3 sub h5
0 a A 1 A1 1
1 b B 2 B1 2
2 c B 3 A1 3
3 d A 4 B2 4
4 e A 5 B2 5
5 f B 6 A2 6
"""

df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0)
df2 = df.set_index(['main', 'sub']).T.sort_index(1)
df = dataframe_with_duplicate_index
index = Index(['h1', 'h3', 'h5'])
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T

result = df2.loc[:, ('A', 'A1')]
result = indexer(df)
tm.assert_frame_equal(result, expected)

result = df2[('A', 'A1')]
tm.assert_frame_equal(result, expected)

def test_mi_access_returns_series(dataframe_with_duplicate_index):
# GH 4146, not returning a block manager when selecting a unique index
# from a duplicate index
# as of 4879, this returns a Series (which is similar to what happens
# with a non-unique)
df = dataframe_with_duplicate_index
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
result = df2['A']['A1']
result = df['A']['A1']
tm.assert_series_equal(result, expected)


def test_mi_access_returns_frame(dataframe_with_duplicate_index):
# selecting a non_unique from the 2nd level
df = dataframe_with_duplicate_index
expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
index=Index(['B2', 'B2'], name='sub'),
columns=['h1', 'h3', 'h5'], ).T
result = df2['A']['B2']
result = df['A']['B2']
tm.assert_frame_equal(result, expected)