diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 00b30bab37441..1013bb3e90149 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,5 +1,3 @@ -from warnings import catch_warnings, simplefilter - import numpy as np import pytest @@ -11,335 +9,339 @@ from pandas.util import testing as tm -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") -class TestMultiIndexGetItem(object): - - def test_series_getitem_multiindex(self): - - # GH 6018 - # series regression getitem with a multi-index - - s = Series([1, 2, 3]) - s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) - - result = s[:, 0] - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.loc[:, 1] - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # xs - result = s.xs(0, level=0) - expected = Series([1], index=[0]) - tm.assert_series_equal(result, expected) - - result = s.xs(1, level=1) - expected = Series([2, 3], index=[1, 2]) - tm.assert_series_equal(result, expected) - - # GH6258 - dt = list(date_range('20130903', periods=3)) - idx = MultiIndex.from_product([list('AB'), dt]) - s = Series([1, 3, 4, 1, 3, 4], index=idx) +@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], + lambda s, x: s.loc[:, x], + lambda s, x: s.xs(x, level=1)]) +@pytest.mark.parametrize('level1_value, expected', [ + (0, Series([1], index=[0])), + (1, Series([2, 3], index=[1, 2])) +]) +def test_series_getitem_multiindex(access_method, level1_value, expected): + + # GH 6018 + # series regression getitem with a multi-index + + s = Series([1, 2, 3]) + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) + result = access_method(s, level1_value) + tm.assert_series_equal(result, expected) + + +def test_series_getitem_multiindex_xs(): + # GH6258 + dt = list(date_range('20130903', periods=3)) + idx = MultiIndex.from_product([list('AB'), dt]) + s = Series([1, 3, 4, 1, 3, 4], index=idx) + + result = s.xs('20130903', level=1) + expected = Series([1, 1], index=list('AB')) + tm.assert_series_equal(result, expected) + + +def test_series_getitem_multiindex_xs_by_label(): + # GH5684 + idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), + ('b', 'two')]) + s = Series([1, 2, 3, 4], index=idx) + s.index.set_names(['L1', 'L2'], inplace=True) + result = s.xs('one', level='L2') + expected = Series([1, 3], index=['a', 'b']) + expected.index.set_names(['L1'], inplace=True) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('level0_value', ['D', 'A']) +def test_getitem_duplicates_multiindex(level0_value): + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise + # the appropriate error, only in PY3 of course! + + index = MultiIndex(levels=[[level0_value, 'B', 'C'], + [0, 26, 27, 37, 57, 67, 75, 82]], + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], + names=['tag', 'day']) + arr = np.random.randn(len(index), 1) + df = DataFrame(arr, index=index, columns=['val']) + + # confirm indexing on missing value raises KeyError + if level0_value != 'A': + msg = "'A'" + with pytest.raises(KeyError, match=msg): + df.val['A'] - result = s.xs('20130903', level=1) - expected = Series([1, 1], index=list('AB')) - tm.assert_series_equal(result, expected) + msg = "'X'" + with pytest.raises(KeyError, match=msg): + df.val['X'] + + result = df.val[level0_value] + expected = Series(arr.ravel()[0:3], name='val', index=Index( + [26, 37, 57], name='day')) + tm.assert_series_equal(result, expected) + + +@pytest.mark.parametrize('indexer, is_level1, expected_error', [ + ([], False, None), # empty ok + (['A'], False, None), + (['A', 'D'], False, None), + (['D'], False, r"\['D'\] not in index"), # not any values found + (pd.IndexSlice[:, ['foo']], True, None), + (pd.IndexSlice[:, ['foo', 'bah']], True, None) +]) +def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, + expected_error): + # GH 7866 + # multi-index slicing with missing indexers + idx = MultiIndex.from_product([['A', 'B', 'C'], + ['foo', 'bar', 'baz']], + names=['one', 'two']) + s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() + + if indexer == []: + expected = s.iloc[[]] + elif is_level1: + expected = Series([0, 3, 6], index=MultiIndex.from_product( + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() + else: + exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], + names=['one', 'two']) + expected = Series(np.arange(3, dtype='int64'), + index=exp_idx).sort_index() - # GH5684 - idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), - ('b', 'two')]) - s = Series([1, 2, 3, 4], index=idx) - s.index.set_names(['L1', 'L2'], inplace=True) - result = s.xs('one', level='L2') - expected = Series([1, 3], index=['a', 'b']) - expected.index.set_names(['L1'], inplace=True) + if expected_error is not None: + with pytest.raises(KeyError, match=expected_error): + s.loc[indexer] + else: + result = s.loc[indexer] tm.assert_series_equal(result, expected) - def test_getitem_duplicates_multiindex(self): - # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise - # the appropriate error, only in PY3 of course! - - index = MultiIndex(levels=[['D', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - arr = np.random.randn(len(index), 1) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['D'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) - def f(): - df.val['A'] +@pytest.mark.parametrize('columns_indexer', [ + ([], slice(None)), + (['foo'], []) +]) +def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): + # GH 8737 + # empty indexer + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], + ['alpha', 'beta'])) + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) + df = df.sort_index(level=0, axis=1) + + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) + result = df.loc[:, columns_indexer] + tm.assert_frame_equal(result, expected) + + +def test_getitem_duplicates_multiindex_non_scalar_type_object(): + # regression from < 0.14.0 + # GH 7914 + df = DataFrame([[np.mean, np.median], ['mean', 'median']], + columns=MultiIndex.from_tuples([('functs', 'mean'), + ('functs', 'median')]), + index=['function', 'name']) + result = df.loc['function', ('functs', 'mean')] + expected = np.mean + assert result == expected + + +def test_getitem_simple(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T + + col = df['foo', 'one'] + tm.assert_almost_equal(col.values, df.values[:, 0]) + msg = r"\('foo', 'four'\)" + with pytest.raises(KeyError, match=msg): + df[('foo', 'four')] + msg = "'foobar'" + with pytest.raises(KeyError, match=msg): + df['foobar'] - pytest.raises(KeyError, f) - def f(): - df.val['X'] - - pytest.raises(KeyError, f) +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") +def test_series_getitem(multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd['A'] - # A is treated as a special Timestamp - index = MultiIndex(levels=[['A', 'B', 'C'], - [0, 26, 27, 37, 57, 67, 75, 82]], - codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], - names=['tag', 'day']) - df = DataFrame(arr, index=index, columns=['val']) - result = df.val['A'] - expected = Series(arr.ravel()[0:3], name='val', index=Index( - [26, 37, 57], name='day')) - tm.assert_series_equal(result, expected) + result = s[2000, 3] - def f(): - df.val['X'] + # TODO(wesm): unused? + # result2 = s.loc[2000, 3] - pytest.raises(KeyError, f) + expected = s.reindex(s.index[42:65]) + expected.index = expected.index.droplevel(0).droplevel(0) + tm.assert_series_equal(result, expected) - # GH 7866 - # multi-index slicing with missing indexers - idx = MultiIndex.from_product([['A', 'B', 'C'], - ['foo', 'bar', 'baz']], - names=['one', 'two']) - s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() + result = s[2000, 3, 10] + expected = s[49] + assert result == expected - exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], - names=['one', 'two']) - expected = Series(np.arange(3, dtype='int64'), - index=exp_idx).sort_index() + # fancy + expected = s.reindex(s.index[49:51]) + result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) - result = s.loc[['A']] - tm.assert_series_equal(result, expected) - result = s.loc[['A', 'D']] - tm.assert_series_equal(result, expected) + result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] + tm.assert_series_equal(result, expected) - # not any values found - pytest.raises(KeyError, lambda: s.loc[['D']]) + # key error + msg = "356" + with pytest.raises(KeyError, match=msg): + s.__getitem__((2000, 3, 4)) - # empty ok - result = s.loc[[]] - expected = s.iloc[[]] - tm.assert_series_equal(result, expected) - idx = pd.IndexSlice - expected = Series([0, 3, 6], index=MultiIndex.from_product( - [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() +def test_series_getitem_corner( + multiindex_year_month_day_dataframe_random_data): + ymd = multiindex_year_month_day_dataframe_random_data + s = ymd['A'] - result = s.loc[idx[:, ['foo']]] - tm.assert_series_equal(result, expected) - result = s.loc[idx[:, ['foo', 'bah']]] - tm.assert_series_equal(result, expected) + # don't segfault, GH #495 + # out of bounds access + msg = "index out of bounds" + with pytest.raises(IndexError, match=msg): + s.__getitem__(len(ymd)) - # GH 8737 - # empty indexer - multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], - ['alpha', 'beta'])) - df = DataFrame( - np.random.randn(5, 6), index=range(5), columns=multi_index) - df = df.sort_index(level=0, axis=1) - - expected = DataFrame(index=range(5), - columns=multi_index.reindex([])[0]) - result1 = df.loc[:, ([], slice(None))] - result2 = df.loc[:, (['foo'], [])] - tm.assert_frame_equal(result1, expected) - tm.assert_frame_equal(result2, expected) - - # regression from < 0.14.0 - # GH 7914 - df = DataFrame([[np.mean, np.median], ['mean', 'median']], - columns=MultiIndex.from_tuples([('functs', 'mean'), - ('functs', 'median')]), - index=['function', 'name']) - result = df.loc['function', ('functs', 'mean')] - assert result == np.mean - - def test_getitem_simple(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T - - col = df['foo', 'one'] - tm.assert_almost_equal(col.values, df.values[:, 0]) - with pytest.raises(KeyError): - df[('foo', 'four')] - with pytest.raises(KeyError): - df['foobar'] - - def test_series_getitem( - self, multiindex_year_month_day_dataframe_random_data): - ymd = multiindex_year_month_day_dataframe_random_data - s = ymd['A'] - - result = s[2000, 3] - - # TODO(wesm): unused? - # result2 = s.loc[2000, 3] - - expected = s.reindex(s.index[42:65]) - expected.index = expected.index.droplevel(0).droplevel(0) - tm.assert_series_equal(result, expected) + # generator + result = s[(x > 0 for x in s)] + expected = s[s > 0] + tm.assert_series_equal(result, expected) - result = s[2000, 3, 10] - expected = s[49] - assert result == expected - # fancy - expected = s.reindex(s.index[49:51]) - result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] - tm.assert_series_equal(result, expected) +def test_frame_getitem_multicolumn_empty_level(): + f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) + f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], + ['level3 item1', 'level3 item2']] - with catch_warnings(record=True): - simplefilter("ignore", DeprecationWarning) - result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] - tm.assert_series_equal(result, expected) + result = f['level1 item1'] + expected = DataFrame([['1'], ['2'], ['3']], index=f.index, + columns=['level3 item1']) + tm.assert_frame_equal(result, expected) - # key error - pytest.raises(KeyError, s.__getitem__, (2000, 3, 4)) - def test_series_getitem_corner( - self, multiindex_year_month_day_dataframe_random_data): - ymd = multiindex_year_month_day_dataframe_random_data - s = ymd['A'] +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") +def test_getitem_tuple_plus_slice(): + # GH #671 + df = DataFrame({'a': lrange(10), + 'b': lrange(10), + 'c': np.random.randn(10), + 'd': np.random.randn(10)}) - # don't segfault, GH #495 - # out of bounds access - pytest.raises(IndexError, s.__getitem__, len(ymd)) + idf = df.set_index(['a', 'b']) - # generator - result = s[(x > 0 for x in s)] - expected = s[s > 0] - tm.assert_series_equal(result, expected) + result = idf.loc[(0, 0), :] + expected = idf.loc[0, 0] + expected2 = idf.xs((0, 0)) + expected3 = idf.ix[0, 0] - def test_frame_getitem_multicolumn_empty_level(self): - f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) - f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], - ['level3 item1', 'level3 item2']] + tm.assert_series_equal(result, expected) + tm.assert_series_equal(result, expected2) + tm.assert_series_equal(result, expected3) - result = f['level1 item1'] - expected = DataFrame([['1'], ['2'], ['3']], index=f.index, - columns=['level3 item1']) - tm.assert_frame_equal(result, expected) - def test_getitem_tuple_plus_slice(self): - # GH #671 - df = DataFrame({'a': lrange(10), - 'b': lrange(10), - 'c': np.random.randn(10), - 'd': np.random.randn(10)}) +def test_getitem_toplevel(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T - idf = df.set_index(['a', 'b']) + result = df['foo'] + expected = df.reindex(columns=df.columns[:3]) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) - result = idf.loc[(0, 0), :] - expected = idf.loc[0, 0] - expected2 = idf.xs((0, 0)) - with catch_warnings(record=True): - simplefilter("ignore", DeprecationWarning) - expected3 = idf.ix[0, 0] + result = df['bar'] + result2 = df.loc[:, 'bar'] - tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected2) - tm.assert_series_equal(result, expected3) + expected = df.reindex(columns=df.columns[3:5]) + expected.columns = expected.columns.droplevel(0) + tm.assert_frame_equal(result, expected) + tm.assert_frame_equal(result, result2) - def test_getitem_toplevel(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T - result = df['foo'] - expected = df.reindex(columns=df.columns[:3]) - expected.columns = expected.columns.droplevel(0) - tm.assert_frame_equal(result, expected) +def test_getitem_int(multiindex_dataframe_random_data): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) - result = df['bar'] - result2 = df.loc[:, 'bar'] + frame = DataFrame(np.random.randn(6, 2), index=index) - expected = df.reindex(columns=df.columns[3:5]) - expected.columns = expected.columns.droplevel(0) - tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, result2) + result = frame.loc[1] + expected = frame[-3:] + expected.index = expected.index.droplevel(0) + tm.assert_frame_equal(result, expected) - def test_getitem_int(self, multiindex_dataframe_random_data): - levels = [[0, 1], [0, 1, 2]] - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - index = MultiIndex(levels=levels, codes=codes) + # raises exception + msg = "3" + with pytest.raises(KeyError, match=msg): + frame.loc.__getitem__(3) - frame = DataFrame(np.random.randn(6, 2), index=index) + # however this will work + frame = multiindex_dataframe_random_data + result = frame.iloc[2] + expected = frame.xs(frame.index[2]) + tm.assert_series_equal(result, expected) - result = frame.loc[1] - expected = frame[-3:] - expected.index = expected.index.droplevel(0) - tm.assert_frame_equal(result, expected) - # raises exception - pytest.raises(KeyError, frame.loc.__getitem__, 3) +def test_frame_getitem_view(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + df = frame.T.copy() - # however this will work - frame = multiindex_dataframe_random_data - result = frame.iloc[2] - expected = frame.xs(frame.index[2]) - tm.assert_series_equal(result, expected) + # this works because we are modifying the underlying array + # really a no-no + df['foo'].values[:] = 0 + assert (df['foo'].values == 0).all() - def test_frame_getitem_view(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T.copy() + # but not if it's mixed-type + df['foo', 'four'] = 'foo' + df = df.sort_index(level=0, axis=1) - # this works because we are modifying the underlying array - # really a no-no - df['foo'].values[:] = 0 - assert (df['foo'].values == 0).all() + # this will work, but will raise/warn as its chained assignment + def f(): + df['foo']['one'] = 2 + return df - # but not if it's mixed-type - df['foo', 'four'] = 'foo' - df = df.sort_index(level=0, axis=1) + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): + df['foo']['one'] = 2 - # this will work, but will raise/warn as its chained assignment - def f(): - df['foo']['one'] = 2 - return df + try: + df = f() + except ValueError: + pass + assert (df['foo', 'one'] == 0).all() - pytest.raises(com.SettingWithCopyError, f) - try: - df = f() - except ValueError: - pass - assert (df['foo', 'one'] == 0).all() +def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data + msg = "11" + with pytest.raises(KeyError, match=msg): + frame.loc.__getitem__((('bar', 'three'), 'B')) - def test_getitem_lowerdim_corner(self, multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - pytest.raises(KeyError, frame.loc.__getitem__, - (('bar', 'three'), 'B')) + # in theory should be inserting in a sorted space???? + frame.loc[('bar', 'three'), 'B'] = 0 + assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 - # in theory should be inserting in a sorted space???? - frame.loc[('bar', 'three'), 'B'] = 0 - assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 - @pytest.mark.parametrize('unicode_strings', [True, False]) - def test_mixed_depth_get(self, unicode_strings): - # If unicode_strings is True, the column labels in dataframe - # construction will use unicode strings in Python 2 (pull request - # #17099). +@pytest.mark.parametrize('unicode_strings', [True, False]) +def test_mixed_depth_get(unicode_strings): + # If unicode_strings is True, the column labels in dataframe + # construction will use unicode strings in Python 2 (pull request + # #17099). - arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], - ['', 'OD', 'OD', 'result1', 'result2', 'result1'], - ['', 'wx', 'wy', '', '', '']] + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], + ['', 'wx', 'wy', '', '', '']] - if unicode_strings: - arrays = [[u(s) for s in arr] for arr in arrays] + if unicode_strings: + arrays = [[u(s) for s in arr] for arr in arrays] - tuples = sorted(zip(*arrays)) - index = MultiIndex.from_tuples(tuples) - df = DataFrame(np.random.randn(4, 6), columns=index) + tuples = sorted(zip(*arrays)) + index = MultiIndex.from_tuples(tuples) + df = DataFrame(np.random.randn(4, 6), columns=index) - result = df['a'] - expected = df['a', '', ''].rename('a') - tm.assert_series_equal(result, expected) + result = df['a'] + expected = df['a', '', ''].rename('a') + tm.assert_series_equal(result, expected) - result = df['routine1', 'result1'] - expected = df['routine1', 'result1', ''] - expected = expected.rename(('routine1', 'result1')) - tm.assert_series_equal(result, expected) + result = df['routine1', 'result1'] + expected = df['routine1', 'result1', ''] + expected = expected.rename(('routine1', 'result1')) + tm.assert_series_equal(result, expected)