From 65567bf00eab50382adf87e5c0104c42a641c68e Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 26 Dec 2018 21:46:36 +0000 Subject: [PATCH 1/9] refactor test_getitem_tuple_plus_slice --- .../tests/indexing/multiindex/test_getitem.py | 34 +++++++------------ 1 file changed, 13 insertions(+), 21 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 8d3997c878b83..bb971c82caa46 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, lrange, range, u, zip +from pandas.compat import StringIO, range, u, zip import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series @@ -206,34 +206,26 @@ def test_series_getitem_corner_generator( def test_frame_getitem_multicolumn_empty_level(): - f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) - f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], - ['level3 item1', 'level3 item2']] + df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) + df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], + ['level3 item1', 'level3 item2']] - result = f['level1 item1'] - expected = DataFrame([['1'], ['2'], ['3']], index=f.index, + result = df['level1 item1'] + expected = DataFrame([['1'], ['2'], ['3']], index=df.index, columns=['level3 item1']) tm.assert_frame_equal(result, expected) -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") def test_getitem_tuple_plus_slice(): - # GH #671 - df = DataFrame({'a': lrange(10), - 'b': lrange(10), + # GH 671 + df = DataFrame({'a': np.arange(10), + 'b': np.arange(10), 'c': np.random.randn(10), - 'd': np.random.randn(10)}) - - idf = df.set_index(['a', 'b']) - - result = idf.loc[(0, 0), :] - expected = idf.loc[0, 0] - expected2 = idf.xs((0, 0)) - expected3 = idf.ix[0, 0] - + 'd': np.random.randn(10)} + ).set_index(['a', 'b']) + expected = df.loc[0, 0] + result = df.loc[(0, 0), :] tm.assert_series_equal(result, expected) - tm.assert_series_equal(result, expected2) - tm.assert_series_equal(result, expected3) def test_getitem_toplevel(multiindex_dataframe_random_data): From d83285d2464855ada8909e75200c8b9c85fbf7e2 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 26 Dec 2018 22:22:18 +0000 Subject: [PATCH 2/9] parametrize test_getitem_toplevel --- .../tests/indexing/multiindex/test_getitem.py | 24 ++++++++----------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index bb971c82caa46..62203213cbd4c 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -228,22 +228,18 @@ def test_getitem_tuple_plus_slice(): tm.assert_series_equal(result, expected) -def test_getitem_toplevel(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T - - result = df['foo'] - expected = df.reindex(columns=df.columns[:3]) - expected.columns = expected.columns.droplevel(0) - tm.assert_frame_equal(result, expected) - - result = df['bar'] - result2 = df.loc[:, 'bar'] - - expected = df.reindex(columns=df.columns[3:5]) +@pytest.mark.parametrize('indexer,expected_slice', [ + (lambda df: df['foo'], slice(3)), + (lambda df: df['bar'], slice(3, 5)), + (lambda df: df.loc[:, 'bar'], slice(3, 5)) +]) +def test_getitem_toplevel( + multiindex_dataframe_random_data, indexer, expected_slice): + df = multiindex_dataframe_random_data.T + expected = df.reindex(columns=df.columns[expected_slice]) expected.columns = expected.columns.droplevel(0) + result = indexer(df) tm.assert_frame_equal(result, expected) - tm.assert_frame_equal(result, result2) def test_getitem_int(multiindex_dataframe_random_data): From 6124297ce1a204324c5a66201e596646462840c9 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Wed, 26 Dec 2018 22:52:15 +0000 Subject: [PATCH 3/9] split test_getitem_int --- .../tests/indexing/multiindex/test_getitem.py | 26 ++++++++++++------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 62203213cbd4c..154ee0ca9f22e 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -242,27 +242,33 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -def test_getitem_int(multiindex_dataframe_random_data): +@pytest.fixture +def frame_random_data_integer_multi_index(): levels = [[0, 1], [0, 1, 2]] codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) - frame = DataFrame(np.random.randn(6, 2), index=index) - result = frame.loc[1] - expected = frame[-3:] +def test_getitem_int(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index + result = df.loc[1] + expected = df[-3:] expected.index = expected.index.droplevel(0) tm.assert_frame_equal(result, expected) - # raises exception + +def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): + df = frame_random_data_integer_multi_index msg = "3" with pytest.raises(KeyError, match=msg): - frame.loc.__getitem__(3) + df.loc.__getitem__(3) - # however this will work - frame = multiindex_dataframe_random_data - result = frame.iloc[2] - expected = frame.xs(frame.index[2]) + +def test_getitem_iloc(multiindex_dataframe_random_data): + df = multiindex_dataframe_random_data + result = df.iloc[2] + expected = df.xs(df.index[2]) tm.assert_series_equal(result, expected) From 2585b5e4076c53a5c5e3b53cb0bd23581bc7b00a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 27 Dec 2018 11:33:43 +0000 Subject: [PATCH 4/9] split test_frame_getitem_view --- .../tests/indexing/multiindex/test_getitem.py | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 154ee0ca9f22e..6d64657792b7d 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series -import pandas.core.common as com +from pandas.core.common import SettingWithCopyError from pandas.core.indexing import IndexingError from pandas.util import testing as tm @@ -272,33 +272,34 @@ def test_getitem_iloc(multiindex_dataframe_random_data): tm.assert_series_equal(result, expected) -def test_frame_getitem_view(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - df = frame.T.copy() - +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): # this works because we are modifying the underlying array # really a no-no + df = multiindex_dataframe_random_data.T df['foo'].values[:] = 0 assert (df['foo'].values == 0).all() - # but not if it's mixed-type - df['foo', 'four'] = 'foo' - df = df.sort_index(level=0, axis=1) - - # this will work, but will raise/warn as its chained assignment - def f(): - df['foo']['one'] = 2 - return df +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): + # will raise/warn as its chained assignment + df = multiindex_dataframe_random_data.T msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(com.SettingWithCopyError, match=msg): + with pytest.raises(SettingWithCopyError, match=msg): df['foo']['one'] = 2 + +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): + frame = multiindex_dataframe_random_data.T + expected = frame + df = frame.copy() + try: - df = f() - except ValueError: + df['foo']['one'] = 2 + except SettingWithCopyError: pass - assert (df['foo', 'one'] == 0).all() + + result = df + tm.assert_frame_equal(result, expected) def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): From 1779835eed97a95f9e992f3b52ba616ca53c48e0 Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 27 Dec 2018 12:17:23 +0000 Subject: [PATCH 5/9] refactor test_getitem_lowerdim_corner --- pandas/tests/indexing/multiindex/test_getitem.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 6d64657792b7d..8aff6e2319459 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -303,14 +303,17 @@ def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): - frame = multiindex_dataframe_random_data - msg = "11" - with pytest.raises(KeyError, match=msg): - frame.loc.__getitem__((('bar', 'three'), 'B')) + df = multiindex_dataframe_random_data + + # test setup - check key not in dataframe + with pytest.raises(KeyError, match="11"): + df.loc[('bar', 'three'), 'B'] # in theory should be inserting in a sorted space???? - frame.loc[('bar', 'three'), 'B'] = 0 - assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 + df.loc[('bar', 'three'), 'B'] = 0 + expected = 0 + result = df.sort_index().loc[('bar', 'three'), 'B'] + assert result == expected @pytest.mark.parametrize('unicode_strings', [True, False]) From 062e48f6d021b9f3bf7feadda43aa7dc909384ee Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 27 Dec 2018 14:06:27 +0000 Subject: [PATCH 6/9] split and parametrize test_mi_access --- .../tests/indexing/multiindex/test_getitem.py | 46 +++++++++++-------- 1 file changed, 28 insertions(+), 18 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 8aff6e2319459..4182b08a997ce 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -1,7 +1,7 @@ import numpy as np import pytest -from pandas.compat import StringIO, range, u, zip +from pandas.compat import range, u, zip import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series @@ -343,41 +343,51 @@ def test_mixed_depth_get(unicode_strings): tm.assert_series_equal(result, expected) -def test_mi_access(): +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [['a', 'd', 'e', 'c', 'f', 'b'], + [1, 4, 5, 3, 6, 2], + [1, 4, 5, 3, 6, 2]] + index = ['h1', 'h3', 'h5'] + columns = MultiIndex( + levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=['main', 'sub']) + return DataFrame(data, index=index, columns=columns) + +@pytest.mark.parametrize('indexer', [ + lambda df: df.loc[:, ('A', 'A1')], + lambda df: df[('A', 'A1')] +]) +def test_mi_access(dataframe_with_duplicate_index, indexer): # GH 4145 - data = """h1 main h3 sub h5 -0 a A 1 A1 1 -1 b B 2 B1 2 -2 c B 3 A1 3 -3 d A 4 B2 4 -4 e A 5 B2 5 -5 f B 6 A2 6 -""" - - df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) - df2 = df.set_index(['main', 'sub']).T.sort_index(1) + df = dataframe_with_duplicate_index index = Index(['h1', 'h3', 'h5']) columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub']) expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T - result = df2.loc[:, ('A', 'A1')] + result = indexer(df) tm.assert_frame_equal(result, expected) - result = df2[('A', 'A1')] - tm.assert_frame_equal(result, expected) +def test_mi_access_returns_series(dataframe_with_duplicate_index): # GH 4146, not returning a block manager when selecting a unique index # from a duplicate index # as of 4879, this returns a Series (which is similar to what happens # with a non-unique) + df = dataframe_with_duplicate_index expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1') - result = df2['A']['A1'] + result = df['A']['A1'] tm.assert_series_equal(result, expected) + +def test_mi_access_returns_frame(dataframe_with_duplicate_index): # selecting a non_unique from the 2nd level + df = dataframe_with_duplicate_index expected = DataFrame([['d', 4, 4], ['e', 5, 5]], index=Index(['B2', 'B2'], name='sub'), columns=['h1', 'h3', 'h5'], ).T - result = df2['A']['B2'] + result = df['A']['B2'] tm.assert_frame_equal(result, expected) From f8cd89b4601fb94328ddd6764f8a44345a3a87ee Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Thu, 27 Dec 2018 20:17:40 +0000 Subject: [PATCH 7/9] revert change to import --- pandas/tests/indexing/multiindex/test_getitem.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 4182b08a997ce..71102902cbe11 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -5,7 +5,7 @@ import pandas as pd from pandas import DataFrame, Index, MultiIndex, Series -from pandas.core.common import SettingWithCopyError +import pandas.core.common as com from pandas.core.indexing import IndexingError from pandas.util import testing as tm @@ -284,7 +284,7 @@ def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): # will raise/warn as its chained assignment df = multiindex_dataframe_random_data.T msg = "A value is trying to be set on a copy of a slice from a DataFrame" - with pytest.raises(SettingWithCopyError, match=msg): + with pytest.raises(com.SettingWithCopyError, match=msg): df['foo']['one'] = 2 @@ -295,7 +295,7 @@ def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): try: df['foo']['one'] = 2 - except SettingWithCopyError: + except com.SettingWithCopyError: pass result = df From eda86fe45f49464d259dfd773c93070e33041b5a Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 28 Dec 2018 08:57:48 +0000 Subject: [PATCH 8/9] move fixtures to top of module --- .../tests/indexing/multiindex/test_getitem.py | 44 +++++++++---------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 71102902cbe11..638af6c579a42 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -10,6 +10,28 @@ from pandas.util import testing as tm +@pytest.fixture +def frame_random_data_integer_multi_index(): + levels = [[0, 1], [0, 1, 2]] + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] + index = MultiIndex(levels=levels, codes=codes) + return DataFrame(np.random.randn(6, 2), index=index) + + +@pytest.fixture +def dataframe_with_duplicate_index(): + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" + data = [['a', 'd', 'e', 'c', 'f', 'b'], + [1, 4, 5, 3, 6, 2], + [1, 4, 5, 3, 6, 2]] + index = ['h1', 'h3', 'h5'] + columns = MultiIndex( + levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], + names=['main', 'sub']) + return DataFrame(data, index=index, columns=columns) + + @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], lambda s, x: s.loc[:, x], lambda s, x: s.xs(x, level=1)]) @@ -242,14 +264,6 @@ def test_getitem_toplevel( tm.assert_frame_equal(result, expected) -@pytest.fixture -def frame_random_data_integer_multi_index(): - levels = [[0, 1], [0, 1, 2]] - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] - index = MultiIndex(levels=levels, codes=codes) - return DataFrame(np.random.randn(6, 2), index=index) - - def test_getitem_int(frame_random_data_integer_multi_index): df = frame_random_data_integer_multi_index result = df.loc[1] @@ -343,20 +357,6 @@ def test_mixed_depth_get(unicode_strings): tm.assert_series_equal(result, expected) -@pytest.fixture -def dataframe_with_duplicate_index(): - """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" - data = [['a', 'd', 'e', 'c', 'f', 'b'], - [1, 4, 5, 3, 6, 2], - [1, 4, 5, 3, 6, 2]] - index = ['h1', 'h3', 'h5'] - columns = MultiIndex( - levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], - codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], - names=['main', 'sub']) - return DataFrame(data, index=index, columns=columns) - - @pytest.mark.parametrize('indexer', [ lambda df: df.loc[:, ('A', 'A1')], lambda df: df[('A', 'A1')] From 4e9a70f17e10c4433e1a9f86fce5edf83c1299cc Mon Sep 17 00:00:00 2001 From: Simon Hawkins Date: Fri, 28 Dec 2018 10:33:19 +0000 Subject: [PATCH 9/9] use context manager --- pandas/tests/indexing/multiindex/test_getitem.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pandas/tests/indexing/multiindex/test_getitem.py b/pandas/tests/indexing/multiindex/test_getitem.py index 638af6c579a42..88e96329105dd 100644 --- a/pandas/tests/indexing/multiindex/test_getitem.py +++ b/pandas/tests/indexing/multiindex/test_getitem.py @@ -306,11 +306,9 @@ def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): frame = multiindex_dataframe_random_data.T expected = frame df = frame.copy() - - try: + msg = "A value is trying to be set on a copy of a slice from a DataFrame" + with pytest.raises(com.SettingWithCopyError, match=msg): df['foo']['one'] = 2 - except com.SettingWithCopyError: - pass result = df tm.assert_frame_equal(result, expected)