diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 8d3997c878b83..88e96329105dd 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lrange, range, u, zip +from pandas.compat import range, u, zip import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series @@ -10,6 +10,28 @@ from pandas.util import testing as tm +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [['a', 'd', 'e', 'c', 'f', 'b'], + [1, 4, 5, 3, 6, 2], + [1, 4, 5, 3, 6, 2]] + index = ['h1', 'h3', 'h5'] + columns = MultiIndex( + levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=['main', 'sub']) + return DataFrame(data, index=index, columns=columns) + + @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)]) @@ -206,116 +228,104 @@ def test_series_getitem_corner_generator( def test_frame_getitem_multicolumn_empty_level(): - f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) - f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], - ['level3 item1', 'level3 item2']] + df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) + df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], + ['level3 item1', 'level3 item2']] - result = f['level1 item1'] - expected = DataFrame([['1'], ['2'], ['3']], index=f.index, + result = df['level1 item1'] + expected = DataFrame([['1'], ['2'], ['3']], index=df.index, columns=['level3 item1']) tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") def test_getitem_tuple_plus_slice(): - # GH #671 - df = DataFrame({'a': lrange(10), - 'b': lrange(10), + # GH 671 + df = DataFrame({'a': np.arange(10), + 'b': np.arange(10), 'c': np.random.randn(10), - 'd': np.random.randn(10)}) - - idf = df.set_index(['a', 'b']) - - result = idf.loc[(0, 0), :] - expected = idf.loc[0, 0] - expected2 = idf.xs((0, 0)) - expected3 = idf.ix[0, 0] - + 'd': np.random.randn(10)} + ).set_index(['a', 'b']) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected2) - tm.assert_series_equal(result, expected3) -def test_getitem_toplevel(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T - - result = df['foo'] - expected = df.reindex(columns=df.columns[:3]) - expected.columns = expected.columns.droplevel(0) - tm.assert_frame_equal(result, expected) - - result = df['bar'] - result2 = df.loc[:, 'bar'] - - expected = df.reindex(columns=df.columns[3:5]) +@pytest.mark.parametrize('indexer,expected_slice', [ + (lambda df: df['foo'], slice(3)), + (lambda df: df['bar'], slice(3, 5)), + (lambda df: df.loc[:, 'bar'], slice(3, 5)) +]) +def test_getitem_toplevel( + multiindex_dataframe_random_data, indexer, expected_slice): + df = multiindex_dataframe_random_data.T + expected = df.reindex(columns=df.columns[expected_slice]) expected.columns = expected.columns.droplevel(0) + result = indexer(df) tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, result2) -def test_getitem_int(multiindex_dataframe_random_data): - levels = [[0, 1], [0, 1, 2]] - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - index = MultiIndex(levels=levels, codes=codes) - - frame = DataFrame(np.random.randn(6, 2), index=index) - - result = frame.loc[1] - expected = frame[-3:] +def test_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] expected.index = expected.index.droplevel(0) tm.assert_frame_equal(result, expected) - # raises exception + +def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index msg = "3" with pytest.raises(KeyError, match=msg): - frame.loc.__getitem__(3) + df.loc.__getitem__(3) - # however this will work - frame = multiindex_dataframe_random_data - result = frame.iloc[2] - expected = frame.xs(frame.index[2]) - tm.assert_series_equal(result, expected) +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) + tm.assert_series_equal(result, expected) -def test_frame_getitem_view(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T.copy() +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no + df = multiindex_dataframe_random_data.T df['foo'].values[:] = 0 assert (df['foo'].values == 0).all() - # but not if it's mixed-type - df['foo', 'four'] = 'foo' - df = df.sort_index(level=0, axis=1) - # this will work, but will raise/warn as its chained assignment - def f(): +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): df['foo']['one'] = 2 - return df + +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() msg = "A value is trying to be set on a copy of a slice from a DataFrame" with pytest.raises(com.SettingWithCopyError, match=msg): df['foo']['one'] = 2 - try: - df = f() - except ValueError: - pass - assert (df['foo', 'one'] == 0).all() + result = df + tm.assert_frame_equal(result, expected) def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - msg = "11" - with pytest.raises(KeyError, match=msg): - frame.loc.__getitem__((('bar', 'three'), 'B')) + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match="11"): + df.loc[('bar', 'three'), 'B'] # in theory should be inserting in a sorted space???? - frame.loc[('bar', 'three'), 'B'] = 0 - assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 + df.loc[('bar', 'three'), 'B'] = 0 + expected = 0 + result = df.sort_index().loc[('bar', 'three'), 'B'] + assert result == expected @pytest.mark.parametrize('unicode_strings', [True, False]) @@ -345,41 +355,37 @@ def test_mixed_depth_get(unicode_strings): tm.assert_series_equal(result, expected) -def test_mi_access(): - +@pytest.mark.parametrize('indexer', [ + lambda df: df.loc[:, ('A', 'A1')], + lambda df: df[('A', 'A1')] +]) +def test_mi_access(dataframe_with_duplicate_index, indexer): # GH 4145 - data = """h1 main h3 sub h5 -0 a A 1 A1 1 -1 b B 2 B1 2 -2 c B 3 A1 3 -3 d A 4 B2 4 -4 e A 5 B2 5 -5 f B 6 A2 6 -""" - - df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) - df2 = df.set_index(['main', 'sub']).T.sort_index(1) + df = dataframe_with_duplicate_index index = Index(['h1', 'h3', 'h5']) columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T - result = df2.loc[:, ('A', 'A1')] + result = indexer(df) tm.assert_frame_equal(result, expected) - result = df2[('A', 'A1')] - tm.assert_frame_equal(result, expected) +def test_mi_access_returns_series(dataframe_with_duplicate_index): # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index # as of 4879, this returns a Series (which is similar to what happens # with a non-unique) + df = dataframe_with_duplicate_index expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') - result = df2['A']['A1'] + result = df['A']['A1'] tm.assert_series_equal(result, expected) + +def test_mi_access_returns_frame(dataframe_with_duplicate_index): # selecting a non_unique from the 2nd level + df = dataframe_with_duplicate_index expected = DataFrame([['d', 4, 4], ['e', 5, 5]], index=Index(['B2', 'B2'], name='sub'), columns=['h1', 'h3', 'h5'], ).T - result = df2['A']['B2'] + result = df['A']['B2'] tm.assert_frame_equal(result, expected)