-
-
Notifications
You must be signed in to change notification settings - Fork 18.4k
REF/TST: Add more pytest idiom to indexing/multiindex/test_getitem.py #24452
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 7 commits
65567bf
d83285d
6124297
2585b5e
1779835
062e48f
f8cd89b
eda86fe
4e9a70f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
import numpy as np | ||
import pytest | ||
|
||
from pandas.compat import StringIO, lrange, range, u, zip | ||
from pandas.compat import range, u, zip | ||
|
||
import pandas as pd | ||
from pandas import DataFrame, Index, MultiIndex, Series | ||
|
@@ -206,116 +206,114 @@ def test_series_getitem_corner_generator( | |
|
||
|
||
def test_frame_getitem_multicolumn_empty_level(): | ||
f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) | ||
f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], | ||
['level3 item1', 'level3 item2']] | ||
df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) | ||
df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], | ||
['level3 item1', 'level3 item2']] | ||
|
||
result = f['level1 item1'] | ||
expected = DataFrame([['1'], ['2'], ['3']], index=f.index, | ||
result = df['level1 item1'] | ||
expected = DataFrame([['1'], ['2'], ['3']], index=df.index, | ||
columns=['level3 item1']) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") | ||
def test_getitem_tuple_plus_slice(): | ||
# GH #671 | ||
df = DataFrame({'a': lrange(10), | ||
'b': lrange(10), | ||
# GH 671 | ||
df = DataFrame({'a': np.arange(10), | ||
'b': np.arange(10), | ||
'c': np.random.randn(10), | ||
'd': np.random.randn(10)}) | ||
|
||
idf = df.set_index(['a', 'b']) | ||
|
||
result = idf.loc[(0, 0), :] | ||
expected = idf.loc[0, 0] | ||
expected2 = idf.xs((0, 0)) | ||
expected3 = idf.ix[0, 0] | ||
|
||
'd': np.random.randn(10)} | ||
).set_index(['a', 'b']) | ||
expected = df.loc[0, 0] | ||
result = df.loc[(0, 0), :] | ||
tm.assert_series_equal(result, expected) | ||
tm.assert_series_equal(result, expected2) | ||
tm.assert_series_equal(result, expected3) | ||
|
||
|
||
def test_getitem_toplevel(multiindex_dataframe_random_data): | ||
frame = multiindex_dataframe_random_data | ||
df = frame.T | ||
|
||
result = df['foo'] | ||
expected = df.reindex(columns=df.columns[:3]) | ||
expected.columns = expected.columns.droplevel(0) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
result = df['bar'] | ||
result2 = df.loc[:, 'bar'] | ||
|
||
expected = df.reindex(columns=df.columns[3:5]) | ||
@pytest.mark.parametrize('indexer,expected_slice', [ | ||
(lambda df: df['foo'], slice(3)), | ||
(lambda df: df['bar'], slice(3, 5)), | ||
(lambda df: df.loc[:, 'bar'], slice(3, 5)) | ||
]) | ||
def test_getitem_toplevel( | ||
multiindex_dataframe_random_data, indexer, expected_slice): | ||
df = multiindex_dataframe_random_data.T | ||
expected = df.reindex(columns=df.columns[expected_slice]) | ||
expected.columns = expected.columns.droplevel(0) | ||
result = indexer(df) | ||
tm.assert_frame_equal(result, expected) | ||
tm.assert_frame_equal(result, result2) | ||
|
||
|
||
def test_getitem_int(multiindex_dataframe_random_data): | ||
@pytest.fixture | ||
def frame_random_data_integer_multi_index(): | ||
levels = [[0, 1], [0, 1, 2]] | ||
codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] | ||
index = MultiIndex(levels=levels, codes=codes) | ||
return DataFrame(np.random.randn(6, 2), index=index) | ||
|
||
frame = DataFrame(np.random.randn(6, 2), index=index) | ||
|
||
result = frame.loc[1] | ||
expected = frame[-3:] | ||
def test_getitem_int(frame_random_data_integer_multi_index): | ||
df = frame_random_data_integer_multi_index | ||
result = df.loc[1] | ||
expected = df[-3:] | ||
expected.index = expected.index.droplevel(0) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
# raises exception | ||
|
||
def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): | ||
df = frame_random_data_integer_multi_index | ||
msg = "3" | ||
with pytest.raises(KeyError, match=msg): | ||
frame.loc.__getitem__(3) | ||
df.loc.__getitem__(3) | ||
|
||
# however this will work | ||
frame = multiindex_dataframe_random_data | ||
result = frame.iloc[2] | ||
expected = frame.xs(frame.index[2]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
def test_getitem_iloc(multiindex_dataframe_random_data): | ||
df = multiindex_dataframe_random_data | ||
result = df.iloc[2] | ||
expected = df.xs(df.index[2]) | ||
tm.assert_series_equal(result, expected) | ||
|
||
def test_frame_getitem_view(multiindex_dataframe_random_data): | ||
frame = multiindex_dataframe_random_data | ||
df = frame.T.copy() | ||
|
||
def test_frame_setitem_view_direct(multiindex_dataframe_random_data): | ||
# this works because we are modifying the underlying array | ||
# really a no-no | ||
df = multiindex_dataframe_random_data.T | ||
df['foo'].values[:] = 0 | ||
assert (df['foo'].values == 0).all() | ||
|
||
# but not if it's mixed-type | ||
df['foo', 'four'] = 'foo' | ||
df = df.sort_index(level=0, axis=1) | ||
|
||
# this will work, but will raise/warn as its chained assignment | ||
def f(): | ||
df['foo']['one'] = 2 | ||
return df | ||
|
||
def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): | ||
# will raise/warn as its chained assignment | ||
df = multiindex_dataframe_random_data.T | ||
msg = "A value is trying to be set on a copy of a slice from a DataFrame" | ||
with pytest.raises(com.SettingWithCopyError, match=msg): | ||
df['foo']['one'] = 2 | ||
|
||
|
||
def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): | ||
frame = multiindex_dataframe_random_data.T | ||
expected = frame | ||
df = frame.copy() | ||
|
||
try: | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. don't do things like this; use the context manager like above There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. i've changed as requested. though not convinced this is the right thing to do in situations like this. the previous test tested that the chained assignment raised the correct exception and this test is to check that the dataframe values are unchanged. having a context manager in the test adds unnecessary/unseen setup and teardown code that could potentially interfere with the tested functionality. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah this must have been changed a while back, the context manager is the correct idiom |
||
df = f() | ||
except ValueError: | ||
df['foo']['one'] = 2 | ||
except com.SettingWithCopyError: | ||
pass | ||
assert (df['foo', 'one'] == 0).all() | ||
|
||
result = df | ||
tm.assert_frame_equal(result, expected) | ||
|
||
|
||
def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): | ||
frame = multiindex_dataframe_random_data | ||
msg = "11" | ||
with pytest.raises(KeyError, match=msg): | ||
frame.loc.__getitem__((('bar', 'three'), 'B')) | ||
df = multiindex_dataframe_random_data | ||
|
||
# test setup - check key not in dataframe | ||
with pytest.raises(KeyError, match="11"): | ||
df.loc[('bar', 'three'), 'B'] | ||
|
||
# in theory should be inserting in a sorted space???? | ||
frame.loc[('bar', 'three'), 'B'] = 0 | ||
assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 | ||
df.loc[('bar', 'three'), 'B'] = 0 | ||
expected = 0 | ||
result = df.sort_index().loc[('bar', 'three'), 'B'] | ||
assert result == expected | ||
|
||
|
||
@pytest.mark.parametrize('unicode_strings', [True, False]) | ||
|
@@ -345,41 +343,51 @@ def test_mixed_depth_get(unicode_strings): | |
tm.assert_series_equal(result, expected) | ||
|
||
|
||
def test_mi_access(): | ||
@pytest.fixture | ||
def dataframe_with_duplicate_index(): | ||
"""Fixture for DataFrame used in tests for gh-4145 and gh-4146""" | ||
data = [['a', 'd', 'e', 'c', 'f', 'b'], | ||
[1, 4, 5, 3, 6, 2], | ||
[1, 4, 5, 3, 6, 2]] | ||
index = ['h1', 'h3', 'h5'] | ||
columns = MultiIndex( | ||
levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], | ||
codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], | ||
names=['main', 'sub']) | ||
return DataFrame(data, index=index, columns=columns) | ||
|
||
|
||
@pytest.mark.parametrize('indexer', [ | ||
lambda df: df.loc[:, ('A', 'A1')], | ||
lambda df: df[('A', 'A1')] | ||
]) | ||
def test_mi_access(dataframe_with_duplicate_index, indexer): | ||
# GH 4145 | ||
data = """h1 main h3 sub h5 | ||
0 a A 1 A1 1 | ||
1 b B 2 B1 2 | ||
2 c B 3 A1 3 | ||
3 d A 4 B2 4 | ||
4 e A 5 B2 5 | ||
5 f B 6 A2 6 | ||
""" | ||
|
||
df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) | ||
df2 = df.set_index(['main', 'sub']).T.sort_index(1) | ||
df = dataframe_with_duplicate_index | ||
index = Index(['h1', 'h3', 'h5']) | ||
columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) | ||
expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T | ||
|
||
result = df2.loc[:, ('A', 'A1')] | ||
result = indexer(df) | ||
tm.assert_frame_equal(result, expected) | ||
|
||
result = df2[('A', 'A1')] | ||
tm.assert_frame_equal(result, expected) | ||
|
||
def test_mi_access_returns_series(dataframe_with_duplicate_index): | ||
# GH 4146, not returning a block manager when selecting a unique index | ||
# from a duplicate index | ||
# as of 4879, this returns a Series (which is similar to what happens | ||
# with a non-unique) | ||
df = dataframe_with_duplicate_index | ||
expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') | ||
result = df2['A']['A1'] | ||
result = df['A']['A1'] | ||
tm.assert_series_equal(result, expected) | ||
|
||
|
||
def test_mi_access_returns_frame(dataframe_with_duplicate_index): | ||
# selecting a non_unique from the 2nd level | ||
df = dataframe_with_duplicate_index | ||
expected = DataFrame([['d', 4, 4], ['e', 5, 5]], | ||
index=Index(['B2', 'B2'], name='sub'), | ||
columns=['h1', 'h3', 'h5'], ).T | ||
result = df2['A']['B2'] | ||
result = df['A']['B2'] | ||
tm.assert_frame_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ideally put the fixtures at the top