|
1 |
| -from warnings import catch_warnings, simplefilter |
2 |
| - |
3 | 1 | import numpy as np
|
4 | 2 | import pytest
|
5 | 3 |
|
|
11 | 9 | from pandas.util import testing as tm
|
12 | 10 |
|
13 | 11 |
|
14 |
| -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") |
15 |
| -class TestMultiIndexGetItem(object): |
16 |
| - |
17 |
| - def test_series_getitem_multiindex(self): |
18 |
| - |
19 |
| - # GH 6018 |
20 |
| - # series regression getitem with a multi-index |
21 |
| - |
22 |
| - s = Series([1, 2, 3]) |
23 |
| - s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) |
24 |
| - |
25 |
| - result = s[:, 0] |
26 |
| - expected = Series([1], index=[0]) |
27 |
| - tm.assert_series_equal(result, expected) |
28 |
| - |
29 |
| - result = s.loc[:, 1] |
30 |
| - expected = Series([2, 3], index=[1, 2]) |
31 |
| - tm.assert_series_equal(result, expected) |
32 |
| - |
33 |
| - # xs |
34 |
| - result = s.xs(0, level=0) |
35 |
| - expected = Series([1], index=[0]) |
36 |
| - tm.assert_series_equal(result, expected) |
37 |
| - |
38 |
| - result = s.xs(1, level=1) |
39 |
| - expected = Series([2, 3], index=[1, 2]) |
40 |
| - tm.assert_series_equal(result, expected) |
41 |
| - |
42 |
| - # GH6258 |
43 |
| - dt = list(date_range('20130903', periods=3)) |
44 |
| - idx = MultiIndex.from_product([list('AB'), dt]) |
45 |
| - s = Series([1, 3, 4, 1, 3, 4], index=idx) |
| 12 | +@pytest.mark.parametrize('access_method', [lambda s, x: s[:, x], |
| 13 | + lambda s, x: s.loc[:, x], |
| 14 | + lambda s, x: s.xs(x, level=1)]) |
| 15 | +@pytest.mark.parametrize('level1_value, expected', [ |
| 16 | + (0, Series([1], index=[0])), |
| 17 | + (1, Series([2, 3], index=[1, 2])) |
| 18 | +]) |
| 19 | +def test_series_getitem_multiindex(access_method, level1_value, expected): |
| 20 | + |
| 21 | + # GH 6018 |
| 22 | + # series regression getitem with a multi-index |
| 23 | + |
| 24 | + s = Series([1, 2, 3]) |
| 25 | + s.index = MultiIndex.from_tuples([(0, 0), (1, 1), (2, 1)]) |
| 26 | + result = access_method(s, level1_value) |
| 27 | + tm.assert_series_equal(result, expected) |
| 28 | + |
| 29 | + |
| 30 | +def test_series_getitem_multiindex_xs(): |
| 31 | + # GH6258 |
| 32 | + dt = list(date_range('20130903', periods=3)) |
| 33 | + idx = MultiIndex.from_product([list('AB'), dt]) |
| 34 | + s = Series([1, 3, 4, 1, 3, 4], index=idx) |
| 35 | + |
| 36 | + result = s.xs('20130903', level=1) |
| 37 | + expected = Series([1, 1], index=list('AB')) |
| 38 | + tm.assert_series_equal(result, expected) |
| 39 | + |
| 40 | + |
| 41 | +def test_series_getitem_multiindex_xs_by_label(): |
| 42 | + # GH5684 |
| 43 | + idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), |
| 44 | + ('b', 'two')]) |
| 45 | + s = Series([1, 2, 3, 4], index=idx) |
| 46 | + s.index.set_names(['L1', 'L2'], inplace=True) |
| 47 | + result = s.xs('one', level='L2') |
| 48 | + expected = Series([1, 3], index=['a', 'b']) |
| 49 | + expected.index.set_names(['L1'], inplace=True) |
| 50 | + tm.assert_series_equal(result, expected) |
| 51 | + |
| 52 | + |
| 53 | +@pytest.mark.parametrize('level0_value', ['D', 'A']) |
| 54 | +def test_getitem_duplicates_multiindex(level0_value): |
| 55 | + # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise |
| 56 | + # the appropriate error, only in PY3 of course! |
| 57 | + |
| 58 | + index = MultiIndex(levels=[[level0_value, 'B', 'C'], |
| 59 | + [0, 26, 27, 37, 57, 67, 75, 82]], |
| 60 | + codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], |
| 61 | + [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], |
| 62 | + names=['tag', 'day']) |
| 63 | + arr = np.random.randn(len(index), 1) |
| 64 | + df = DataFrame(arr, index=index, columns=['val']) |
| 65 | + |
| 66 | + # confirm indexing on missing value raises KeyError |
| 67 | + if level0_value != 'A': |
| 68 | + msg = "'A'" |
| 69 | + with pytest.raises(KeyError, match=msg): |
| 70 | + df.val['A'] |
46 | 71 |
|
47 |
| - result = s.xs('20130903', level=1) |
48 |
| - expected = Series([1, 1], index=list('AB')) |
49 |
| - tm.assert_series_equal(result, expected) |
| 72 | + msg = "'X'" |
| 73 | + with pytest.raises(KeyError, match=msg): |
| 74 | + df.val['X'] |
| 75 | + |
| 76 | + result = df.val[level0_value] |
| 77 | + expected = Series(arr.ravel()[0:3], name='val', index=Index( |
| 78 | + [26, 37, 57], name='day')) |
| 79 | + tm.assert_series_equal(result, expected) |
| 80 | + |
| 81 | + |
| 82 | +@pytest.mark.parametrize('indexer, is_level1, expected_error', [ |
| 83 | + ([], False, None), # empty ok |
| 84 | + (['A'], False, None), |
| 85 | + (['A', 'D'], False, None), |
| 86 | + (['D'], False, r"\['D'\] not in index"), # not any values found |
| 87 | + (pd.IndexSlice[:, ['foo']], True, None), |
| 88 | + (pd.IndexSlice[:, ['foo', 'bah']], True, None) |
| 89 | +]) |
| 90 | +def test_getitem_duplicates_multiindex_missing_indexers(indexer, is_level1, |
| 91 | + expected_error): |
| 92 | + # GH 7866 |
| 93 | + # multi-index slicing with missing indexers |
| 94 | + idx = MultiIndex.from_product([['A', 'B', 'C'], |
| 95 | + ['foo', 'bar', 'baz']], |
| 96 | + names=['one', 'two']) |
| 97 | + s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() |
| 98 | + |
| 99 | + if indexer == []: |
| 100 | + expected = s.iloc[[]] |
| 101 | + elif is_level1: |
| 102 | + expected = Series([0, 3, 6], index=MultiIndex.from_product( |
| 103 | + [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() |
| 104 | + else: |
| 105 | + exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], |
| 106 | + names=['one', 'two']) |
| 107 | + expected = Series(np.arange(3, dtype='int64'), |
| 108 | + index=exp_idx).sort_index() |
50 | 109 |
|
51 |
| - # GH5684 |
52 |
| - idx = MultiIndex.from_tuples([('a', 'one'), ('a', 'two'), ('b', 'one'), |
53 |
| - ('b', 'two')]) |
54 |
| - s = Series([1, 2, 3, 4], index=idx) |
55 |
| - s.index.set_names(['L1', 'L2'], inplace=True) |
56 |
| - result = s.xs('one', level='L2') |
57 |
| - expected = Series([1, 3], index=['a', 'b']) |
58 |
| - expected.index.set_names(['L1'], inplace=True) |
| 110 | + if expected_error is not None: |
| 111 | + with pytest.raises(KeyError, match=expected_error): |
| 112 | + s.loc[indexer] |
| 113 | + else: |
| 114 | + result = s.loc[indexer] |
59 | 115 | tm.assert_series_equal(result, expected)
|
60 | 116 |
|
61 |
| - def test_getitem_duplicates_multiindex(self): |
62 |
| - # GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise |
63 |
| - # the appropriate error, only in PY3 of course! |
64 |
| - |
65 |
| - index = MultiIndex(levels=[['D', 'B', 'C'], |
66 |
| - [0, 26, 27, 37, 57, 67, 75, 82]], |
67 |
| - codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], |
68 |
| - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], |
69 |
| - names=['tag', 'day']) |
70 |
| - arr = np.random.randn(len(index), 1) |
71 |
| - df = DataFrame(arr, index=index, columns=['val']) |
72 |
| - result = df.val['D'] |
73 |
| - expected = Series(arr.ravel()[0:3], name='val', index=Index( |
74 |
| - [26, 37, 57], name='day')) |
75 |
| - tm.assert_series_equal(result, expected) |
76 | 117 |
|
77 |
| - def f(): |
78 |
| - df.val['A'] |
| 118 | +@pytest.mark.parametrize('columns_indexer', [ |
| 119 | + ([], slice(None)), |
| 120 | + (['foo'], []) |
| 121 | +]) |
| 122 | +def test_getitem_duplicates_multiindex_empty_indexer(columns_indexer): |
| 123 | + # GH 8737 |
| 124 | + # empty indexer |
| 125 | + multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], |
| 126 | + ['alpha', 'beta'])) |
| 127 | + df = DataFrame(np.random.randn(5, 6), index=range(5), columns=multi_index) |
| 128 | + df = df.sort_index(level=0, axis=1) |
| 129 | + |
| 130 | + expected = DataFrame(index=range(5), columns=multi_index.reindex([])[0]) |
| 131 | + result = df.loc[:, columns_indexer] |
| 132 | + tm.assert_frame_equal(result, expected) |
| 133 | + |
| 134 | + |
| 135 | +def test_getitem_duplicates_multiindex_non_scalar_type_object(): |
| 136 | + # regression from < 0.14.0 |
| 137 | + # GH 7914 |
| 138 | + df = DataFrame([[np.mean, np.median], ['mean', 'median']], |
| 139 | + columns=MultiIndex.from_tuples([('functs', 'mean'), |
| 140 | + ('functs', 'median')]), |
| 141 | + index=['function', 'name']) |
| 142 | + result = df.loc['function', ('functs', 'mean')] |
| 143 | + expected = np.mean |
| 144 | + assert result == expected |
| 145 | + |
| 146 | + |
| 147 | +def test_getitem_simple(multiindex_dataframe_random_data): |
| 148 | + frame = multiindex_dataframe_random_data |
| 149 | + df = frame.T |
| 150 | + |
| 151 | + col = df['foo', 'one'] |
| 152 | + tm.assert_almost_equal(col.values, df.values[:, 0]) |
| 153 | + msg = r"\('foo', 'four'\)" |
| 154 | + with pytest.raises(KeyError, match=msg): |
| 155 | + df[('foo', 'four')] |
| 156 | + msg = "'foobar'" |
| 157 | + with pytest.raises(KeyError, match=msg): |
| 158 | + df['foobar'] |
79 | 159 |
|
80 |
| - pytest.raises(KeyError, f) |
81 | 160 |
|
82 |
| - def f(): |
83 |
| - df.val['X'] |
84 |
| - |
85 |
| - pytest.raises(KeyError, f) |
| 161 | +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") |
| 162 | +def test_series_getitem(multiindex_year_month_day_dataframe_random_data): |
| 163 | + ymd = multiindex_year_month_day_dataframe_random_data |
| 164 | + s = ymd['A'] |
86 | 165 |
|
87 |
| - # A is treated as a special Timestamp |
88 |
| - index = MultiIndex(levels=[['A', 'B', 'C'], |
89 |
| - [0, 26, 27, 37, 57, 67, 75, 82]], |
90 |
| - codes=[[0, 0, 0, 1, 2, 2, 2, 2, 2, 2], |
91 |
| - [1, 3, 4, 6, 0, 2, 2, 3, 5, 7]], |
92 |
| - names=['tag', 'day']) |
93 |
| - df = DataFrame(arr, index=index, columns=['val']) |
94 |
| - result = df.val['A'] |
95 |
| - expected = Series(arr.ravel()[0:3], name='val', index=Index( |
96 |
| - [26, 37, 57], name='day')) |
97 |
| - tm.assert_series_equal(result, expected) |
| 166 | + result = s[2000, 3] |
98 | 167 |
|
99 |
| - def f(): |
100 |
| - df.val['X'] |
| 168 | + # TODO(wesm): unused? |
| 169 | + # result2 = s.loc[2000, 3] |
101 | 170 |
|
102 |
| - pytest.raises(KeyError, f) |
| 171 | + expected = s.reindex(s.index[42:65]) |
| 172 | + expected.index = expected.index.droplevel(0).droplevel(0) |
| 173 | + tm.assert_series_equal(result, expected) |
103 | 174 |
|
104 |
| - # GH 7866 |
105 |
| - # multi-index slicing with missing indexers |
106 |
| - idx = MultiIndex.from_product([['A', 'B', 'C'], |
107 |
| - ['foo', 'bar', 'baz']], |
108 |
| - names=['one', 'two']) |
109 |
| - s = Series(np.arange(9, dtype='int64'), index=idx).sort_index() |
| 175 | + result = s[2000, 3, 10] |
| 176 | + expected = s[49] |
| 177 | + assert result == expected |
110 | 178 |
|
111 |
| - exp_idx = MultiIndex.from_product([['A'], ['foo', 'bar', 'baz']], |
112 |
| - names=['one', 'two']) |
113 |
| - expected = Series(np.arange(3, dtype='int64'), |
114 |
| - index=exp_idx).sort_index() |
| 179 | + # fancy |
| 180 | + expected = s.reindex(s.index[49:51]) |
| 181 | + result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] |
| 182 | + tm.assert_series_equal(result, expected) |
115 | 183 |
|
116 |
| - result = s.loc[['A']] |
117 |
| - tm.assert_series_equal(result, expected) |
118 |
| - result = s.loc[['A', 'D']] |
119 |
| - tm.assert_series_equal(result, expected) |
| 184 | + result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] |
| 185 | + tm.assert_series_equal(result, expected) |
120 | 186 |
|
121 |
| - # not any values found |
122 |
| - pytest.raises(KeyError, lambda: s.loc[['D']]) |
| 187 | + # key error |
| 188 | + msg = "356" |
| 189 | + with pytest.raises(KeyError, match=msg): |
| 190 | + s.__getitem__((2000, 3, 4)) |
123 | 191 |
|
124 |
| - # empty ok |
125 |
| - result = s.loc[[]] |
126 |
| - expected = s.iloc[[]] |
127 |
| - tm.assert_series_equal(result, expected) |
128 | 192 |
|
129 |
| - idx = pd.IndexSlice |
130 |
| - expected = Series([0, 3, 6], index=MultiIndex.from_product( |
131 |
| - [['A', 'B', 'C'], ['foo']], names=['one', 'two'])).sort_index() |
| 193 | +def test_series_getitem_corner( |
| 194 | + multiindex_year_month_day_dataframe_random_data): |
| 195 | + ymd = multiindex_year_month_day_dataframe_random_data |
| 196 | + s = ymd['A'] |
132 | 197 |
|
133 |
| - result = s.loc[idx[:, ['foo']]] |
134 |
| - tm.assert_series_equal(result, expected) |
135 |
| - result = s.loc[idx[:, ['foo', 'bah']]] |
136 |
| - tm.assert_series_equal(result, expected) |
| 198 | + # don't segfault, GH #495 |
| 199 | + # out of bounds access |
| 200 | + msg = "index out of bounds" |
| 201 | + with pytest.raises(IndexError, match=msg): |
| 202 | + s.__getitem__(len(ymd)) |
137 | 203 |
|
138 |
| - # GH 8737 |
139 |
| - # empty indexer |
140 |
| - multi_index = MultiIndex.from_product((['foo', 'bar', 'baz'], |
141 |
| - ['alpha', 'beta'])) |
142 |
| - df = DataFrame( |
143 |
| - np.random.randn(5, 6), index=range(5), columns=multi_index) |
144 |
| - df = df.sort_index(level=0, axis=1) |
145 |
| - |
146 |
| - expected = DataFrame(index=range(5), |
147 |
| - columns=multi_index.reindex([])[0]) |
148 |
| - result1 = df.loc[:, ([], slice(None))] |
149 |
| - result2 = df.loc[:, (['foo'], [])] |
150 |
| - tm.assert_frame_equal(result1, expected) |
151 |
| - tm.assert_frame_equal(result2, expected) |
152 |
| - |
153 |
| - # regression from < 0.14.0 |
154 |
| - # GH 7914 |
155 |
| - df = DataFrame([[np.mean, np.median], ['mean', 'median']], |
156 |
| - columns=MultiIndex.from_tuples([('functs', 'mean'), |
157 |
| - ('functs', 'median')]), |
158 |
| - index=['function', 'name']) |
159 |
| - result = df.loc['function', ('functs', 'mean')] |
160 |
| - assert result == np.mean |
161 |
| - |
162 |
| - def test_getitem_simple(self, multiindex_dataframe_random_data): |
163 |
| - frame = multiindex_dataframe_random_data |
164 |
| - df = frame.T |
165 |
| - |
166 |
| - col = df['foo', 'one'] |
167 |
| - tm.assert_almost_equal(col.values, df.values[:, 0]) |
168 |
| - with pytest.raises(KeyError): |
169 |
| - df[('foo', 'four')] |
170 |
| - with pytest.raises(KeyError): |
171 |
| - df['foobar'] |
172 |
| - |
173 |
| - def test_series_getitem( |
174 |
| - self, multiindex_year_month_day_dataframe_random_data): |
175 |
| - ymd = multiindex_year_month_day_dataframe_random_data |
176 |
| - s = ymd['A'] |
177 |
| - |
178 |
| - result = s[2000, 3] |
179 |
| - |
180 |
| - # TODO(wesm): unused? |
181 |
| - # result2 = s.loc[2000, 3] |
182 |
| - |
183 |
| - expected = s.reindex(s.index[42:65]) |
184 |
| - expected.index = expected.index.droplevel(0).droplevel(0) |
185 |
| - tm.assert_series_equal(result, expected) |
| 204 | + # generator |
| 205 | + result = s[(x > 0 for x in s)] |
| 206 | + expected = s[s > 0] |
| 207 | + tm.assert_series_equal(result, expected) |
186 | 208 |
|
187 |
| - result = s[2000, 3, 10] |
188 |
| - expected = s[49] |
189 |
| - assert result == expected |
190 | 209 |
|
191 |
| - # fancy |
192 |
| - expected = s.reindex(s.index[49:51]) |
193 |
| - result = s.loc[[(2000, 3, 10), (2000, 3, 13)]] |
194 |
| - tm.assert_series_equal(result, expected) |
| 210 | +def test_frame_getitem_multicolumn_empty_level(): |
| 211 | + f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) |
| 212 | + f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], |
| 213 | + ['level3 item1', 'level3 item2']] |
195 | 214 |
|
196 |
| - with catch_warnings(record=True): |
197 |
| - simplefilter("ignore", DeprecationWarning) |
198 |
| - result = s.ix[[(2000, 3, 10), (2000, 3, 13)]] |
199 |
| - tm.assert_series_equal(result, expected) |
| 215 | + result = f['level1 item1'] |
| 216 | + expected = DataFrame([['1'], ['2'], ['3']], index=f.index, |
| 217 | + columns=['level3 item1']) |
| 218 | + tm.assert_frame_equal(result, expected) |
200 | 219 |
|
201 |
| - # key error |
202 |
| - pytest.raises(KeyError, s.__getitem__, (2000, 3, 4)) |
203 | 220 |
|
204 |
| - def test_series_getitem_corner( |
205 |
| - self, multiindex_year_month_day_dataframe_random_data): |
206 |
| - ymd = multiindex_year_month_day_dataframe_random_data |
207 |
| - s = ymd['A'] |
| 221 | +@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") |
| 222 | +def test_getitem_tuple_plus_slice(): |
| 223 | + # GH #671 |
| 224 | + df = DataFrame({'a': lrange(10), |
| 225 | + 'b': lrange(10), |
| 226 | + 'c': np.random.randn(10), |
| 227 | + 'd': np.random.randn(10)}) |
208 | 228 |
|
209 |
| - # don't segfault, GH #495 |
210 |
| - # out of bounds access |
211 |
| - pytest.raises(IndexError, s.__getitem__, len(ymd)) |
| 229 | + idf = df.set_index(['a', 'b']) |
212 | 230 |
|
213 |
| - # generator |
214 |
| - result = s[(x > 0 for x in s)] |
215 |
| - expected = s[s > 0] |
216 |
| - tm.assert_series_equal(result, expected) |
| 231 | + result = idf.loc[(0, 0), :] |
| 232 | + expected = idf.loc[0, 0] |
| 233 | + expected2 = idf.xs((0, 0)) |
| 234 | + expected3 = idf.ix[0, 0] |
217 | 235 |
|
218 |
| - def test_frame_getitem_multicolumn_empty_level(self): |
219 |
| - f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) |
220 |
| - f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], |
221 |
| - ['level3 item1', 'level3 item2']] |
| 236 | + tm.assert_series_equal(result, expected) |
| 237 | + tm.assert_series_equal(result, expected2) |
| 238 | + tm.assert_series_equal(result, expected3) |
222 | 239 |
|
223 |
| - result = f['level1 item1'] |
224 |
| - expected = DataFrame([['1'], ['2'], ['3']], index=f.index, |
225 |
| - columns=['level3 item1']) |
226 |
| - tm.assert_frame_equal(result, expected) |
227 | 240 |
|
228 |
| - def test_getitem_tuple_plus_slice(self): |
229 |
| - # GH #671 |
230 |
| - df = DataFrame({'a': lrange(10), |
231 |
| - 'b': lrange(10), |
232 |
| - 'c': np.random.randn(10), |
233 |
| - 'd': np.random.randn(10)}) |
| 241 | +def test_getitem_toplevel(multiindex_dataframe_random_data): |
| 242 | + frame = multiindex_dataframe_random_data |
| 243 | + df = frame.T |
234 | 244 |
|
235 |
| - idf = df.set_index(['a', 'b']) |
| 245 | + result = df['foo'] |
| 246 | + expected = df.reindex(columns=df.columns[:3]) |
| 247 | + expected.columns = expected.columns.droplevel(0) |
| 248 | + tm.assert_frame_equal(result, expected) |
236 | 249 |
|
237 |
| - result = idf.loc[(0, 0), :] |
238 |
| - expected = idf.loc[0, 0] |
239 |
| - expected2 = idf.xs((0, 0)) |
240 |
| - with catch_warnings(record=True): |
241 |
| - simplefilter("ignore", DeprecationWarning) |
242 |
| - expected3 = idf.ix[0, 0] |
| 250 | + result = df['bar'] |
| 251 | + result2 = df.loc[:, 'bar'] |
243 | 252 |
|
244 |
| - tm.assert_series_equal(result, expected) |
245 |
| - tm.assert_series_equal(result, expected2) |
246 |
| - tm.assert_series_equal(result, expected3) |
| 253 | + expected = df.reindex(columns=df.columns[3:5]) |
| 254 | + expected.columns = expected.columns.droplevel(0) |
| 255 | + tm.assert_frame_equal(result, expected) |
| 256 | + tm.assert_frame_equal(result, result2) |
247 | 257 |
|
248 |
| - def test_getitem_toplevel(self, multiindex_dataframe_random_data): |
249 |
| - frame = multiindex_dataframe_random_data |
250 |
| - df = frame.T |
251 | 258 |
|
252 |
| - result = df['foo'] |
253 |
| - expected = df.reindex(columns=df.columns[:3]) |
254 |
| - expected.columns = expected.columns.droplevel(0) |
255 |
| - tm.assert_frame_equal(result, expected) |
| 259 | +def test_getitem_int(multiindex_dataframe_random_data): |
| 260 | + levels = [[0, 1], [0, 1, 2]] |
| 261 | + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] |
| 262 | + index = MultiIndex(levels=levels, codes=codes) |
256 | 263 |
|
257 |
| - result = df['bar'] |
258 |
| - result2 = df.loc[:, 'bar'] |
| 264 | + frame = DataFrame(np.random.randn(6, 2), index=index) |
259 | 265 |
|
260 |
| - expected = df.reindex(columns=df.columns[3:5]) |
261 |
| - expected.columns = expected.columns.droplevel(0) |
262 |
| - tm.assert_frame_equal(result, expected) |
263 |
| - tm.assert_frame_equal(result, result2) |
| 266 | + result = frame.loc[1] |
| 267 | + expected = frame[-3:] |
| 268 | + expected.index = expected.index.droplevel(0) |
| 269 | + tm.assert_frame_equal(result, expected) |
264 | 270 |
|
265 |
| - def test_getitem_int(self, multiindex_dataframe_random_data): |
266 |
| - levels = [[0, 1], [0, 1, 2]] |
267 |
| - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] |
268 |
| - index = MultiIndex(levels=levels, codes=codes) |
| 271 | + # raises exception |
| 272 | + msg = "3" |
| 273 | + with pytest.raises(KeyError, match=msg): |
| 274 | + frame.loc.__getitem__(3) |
269 | 275 |
|
270 |
| - frame = DataFrame(np.random.randn(6, 2), index=index) |
| 276 | + # however this will work |
| 277 | + frame = multiindex_dataframe_random_data |
| 278 | + result = frame.iloc[2] |
| 279 | + expected = frame.xs(frame.index[2]) |
| 280 | + tm.assert_series_equal(result, expected) |
271 | 281 |
|
272 |
| - result = frame.loc[1] |
273 |
| - expected = frame[-3:] |
274 |
| - expected.index = expected.index.droplevel(0) |
275 |
| - tm.assert_frame_equal(result, expected) |
276 | 282 |
|
277 |
| - # raises exception |
278 |
| - pytest.raises(KeyError, frame.loc.__getitem__, 3) |
| 283 | +def test_frame_getitem_view(multiindex_dataframe_random_data): |
| 284 | + frame = multiindex_dataframe_random_data |
| 285 | + df = frame.T.copy() |
279 | 286 |
|
280 |
| - # however this will work |
281 |
| - frame = multiindex_dataframe_random_data |
282 |
| - result = frame.iloc[2] |
283 |
| - expected = frame.xs(frame.index[2]) |
284 |
| - tm.assert_series_equal(result, expected) |
| 287 | + # this works because we are modifying the underlying array |
| 288 | + # really a no-no |
| 289 | + df['foo'].values[:] = 0 |
| 290 | + assert (df['foo'].values == 0).all() |
285 | 291 |
|
286 |
| - def test_frame_getitem_view(self, multiindex_dataframe_random_data): |
287 |
| - frame = multiindex_dataframe_random_data |
288 |
| - df = frame.T.copy() |
| 292 | + # but not if it's mixed-type |
| 293 | + df['foo', 'four'] = 'foo' |
| 294 | + df = df.sort_index(level=0, axis=1) |
289 | 295 |
|
290 |
| - # this works because we are modifying the underlying array |
291 |
| - # really a no-no |
292 |
| - df['foo'].values[:] = 0 |
293 |
| - assert (df['foo'].values == 0).all() |
| 296 | + # this will work, but will raise/warn as its chained assignment |
| 297 | + def f(): |
| 298 | + df['foo']['one'] = 2 |
| 299 | + return df |
294 | 300 |
|
295 |
| - # but not if it's mixed-type |
296 |
| - df['foo', 'four'] = 'foo' |
297 |
| - df = df.sort_index(level=0, axis=1) |
| 301 | + msg = "A value is trying to be set on a copy of a slice from a DataFrame" |
| 302 | + with pytest.raises(com.SettingWithCopyError, match=msg): |
| 303 | + df['foo']['one'] = 2 |
298 | 304 |
|
299 |
| - # this will work, but will raise/warn as its chained assignment |
300 |
| - def f(): |
301 |
| - df['foo']['one'] = 2 |
302 |
| - return df |
| 305 | + try: |
| 306 | + df = f() |
| 307 | + except ValueError: |
| 308 | + pass |
| 309 | + assert (df['foo', 'one'] == 0).all() |
303 | 310 |
|
304 |
| - pytest.raises(com.SettingWithCopyError, f) |
305 | 311 |
|
306 |
| - try: |
307 |
| - df = f() |
308 |
| - except ValueError: |
309 |
| - pass |
310 |
| - assert (df['foo', 'one'] == 0).all() |
| 312 | +def test_getitem_lowerdim_corner(multiindex_dataframe_random_data): |
| 313 | + frame = multiindex_dataframe_random_data |
| 314 | + msg = "11" |
| 315 | + with pytest.raises(KeyError, match=msg): |
| 316 | + frame.loc.__getitem__((('bar', 'three'), 'B')) |
311 | 317 |
|
312 |
| - def test_getitem_lowerdim_corner(self, multiindex_dataframe_random_data): |
313 |
| - frame = multiindex_dataframe_random_data |
314 |
| - pytest.raises(KeyError, frame.loc.__getitem__, |
315 |
| - (('bar', 'three'), 'B')) |
| 318 | + # in theory should be inserting in a sorted space???? |
| 319 | + frame.loc[('bar', 'three'), 'B'] = 0 |
| 320 | + assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 |
316 | 321 |
|
317 |
| - # in theory should be inserting in a sorted space???? |
318 |
| - frame.loc[('bar', 'three'), 'B'] = 0 |
319 |
| - assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 |
320 | 322 |
|
321 |
| - @pytest.mark.parametrize('unicode_strings', [True, False]) |
322 |
| - def test_mixed_depth_get(self, unicode_strings): |
323 |
| - # If unicode_strings is True, the column labels in dataframe |
324 |
| - # construction will use unicode strings in Python 2 (pull request |
325 |
| - # #17099). |
| 323 | +@pytest.mark.parametrize('unicode_strings', [True, False]) |
| 324 | +def test_mixed_depth_get(unicode_strings): |
| 325 | + # If unicode_strings is True, the column labels in dataframe |
| 326 | + # construction will use unicode strings in Python 2 (pull request |
| 327 | + # #17099). |
326 | 328 |
|
327 |
| - arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], |
328 |
| - ['', 'OD', 'OD', 'result1', 'result2', 'result1'], |
329 |
| - ['', 'wx', 'wy', '', '', '']] |
| 329 | + arrays = [['a', 'top', 'top', 'routine1', 'routine1', 'routine2'], |
| 330 | + ['', 'OD', 'OD', 'result1', 'result2', 'result1'], |
| 331 | + ['', 'wx', 'wy', '', '', '']] |
330 | 332 |
|
331 |
| - if unicode_strings: |
332 |
| - arrays = [[u(s) for s in arr] for arr in arrays] |
| 333 | + if unicode_strings: |
| 334 | + arrays = [[u(s) for s in arr] for arr in arrays] |
333 | 335 |
|
334 |
| - tuples = sorted(zip(*arrays)) |
335 |
| - index = MultiIndex.from_tuples(tuples) |
336 |
| - df = DataFrame(np.random.randn(4, 6), columns=index) |
| 336 | + tuples = sorted(zip(*arrays)) |
| 337 | + index = MultiIndex.from_tuples(tuples) |
| 338 | + df = DataFrame(np.random.randn(4, 6), columns=index) |
337 | 339 |
|
338 |
| - result = df['a'] |
339 |
| - expected = df['a', '', ''].rename('a') |
340 |
| - tm.assert_series_equal(result, expected) |
| 340 | + result = df['a'] |
| 341 | + expected = df['a', '', ''].rename('a') |
| 342 | + tm.assert_series_equal(result, expected) |
341 | 343 |
|
342 |
| - result = df['routine1', 'result1'] |
343 |
| - expected = df['routine1', 'result1', ''] |
344 |
| - expected = expected.rename(('routine1', 'result1')) |
345 |
| - tm.assert_series_equal(result, expected) |
| 344 | + result = df['routine1', 'result1'] |
| 345 | + expected = df['routine1', 'result1', ''] |
| 346 | + expected = expected.rename(('routine1', 'result1')) |
| 347 | + tm.assert_series_equal(result, expected) |
0 commit comments