|
1 | 1 | import numpy as np
|
2 | 2 | import pytest
|
3 | 3 |
|
4 |
| -from pandas.compat import StringIO, lrange, range, u, zip |
| 4 | +from pandas.compat import range, u, zip |
5 | 5 |
|
6 | 6 | import pandas as pd
|
7 | 7 | from pandas import DataFrame, Index, MultiIndex, Series
|
|
10 | 10 | from pandas.util import testing as tm
|
11 | 11 |
|
12 | 12 |
|
| 13 | +@pytest.fixture |
| 14 | +def frame_random_data_integer_multi_index(): |
| 15 | + levels = [[0, 1], [0, 1, 2]] |
| 16 | + codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] |
| 17 | + index = MultiIndex(levels=levels, codes=codes) |
| 18 | + return DataFrame(np.random.randn(6, 2), index=index) |
| 19 | + |
| 20 | + |
| 21 | +@pytest.fixture |
| 22 | +def dataframe_with_duplicate_index(): |
| 23 | + """Fixture for DataFrame used in tests for gh-4145 and gh-4146""" |
| 24 | + data = [['a', 'd', 'e', 'c', 'f', 'b'], |
| 25 | + [1, 4, 5, 3, 6, 2], |
| 26 | + [1, 4, 5, 3, 6, 2]] |
| 27 | + index = ['h1', 'h3', 'h5'] |
| 28 | + columns = MultiIndex( |
| 29 | + levels=[['A', 'B'], ['A1', 'A2', 'B1', 'B2']], |
| 30 | + codes=[[0, 0, 0, 1, 1, 1], [0, 3, 3, 0, 1, 2]], |
| 31 | + names=['main', 'sub']) |
| 32 | + return DataFrame(data, index=index, columns=columns) |
| 33 | + |
| 34 | + |
13 | 35 | @pytest.mark.parametrize('access_method', [lambda s, x: s[:, x],
|
14 | 36 | lambda s, x: s.loc[:, x],
|
15 | 37 | lambda s, x: s.xs(x, level=1)])
|
@@ -206,116 +228,104 @@ def test_series_getitem_corner_generator(
|
206 | 228 |
|
207 | 229 |
|
208 | 230 | def test_frame_getitem_multicolumn_empty_level():
|
209 |
| - f = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) |
210 |
| - f.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], |
211 |
| - ['level3 item1', 'level3 item2']] |
| 231 | + df = DataFrame({'a': ['1', '2', '3'], 'b': ['2', '3', '4']}) |
| 232 | + df.columns = [['level1 item1', 'level1 item2'], ['', 'level2 item2'], |
| 233 | + ['level3 item1', 'level3 item2']] |
212 | 234 |
|
213 |
| - result = f['level1 item1'] |
214 |
| - expected = DataFrame([['1'], ['2'], ['3']], index=f.index, |
| 235 | + result = df['level1 item1'] |
| 236 | + expected = DataFrame([['1'], ['2'], ['3']], index=df.index, |
215 | 237 | columns=['level3 item1'])
|
216 | 238 | tm.assert_frame_equal(result, expected)
|
217 | 239 |
|
218 | 240 |
|
219 |
| -@pytest.mark.filterwarnings("ignore:\\n.ix:DeprecationWarning") |
220 | 241 | def test_getitem_tuple_plus_slice():
|
221 |
| - # GH #671 |
222 |
| - df = DataFrame({'a': lrange(10), |
223 |
| - 'b': lrange(10), |
| 242 | + # GH 671 |
| 243 | + df = DataFrame({'a': np.arange(10), |
| 244 | + 'b': np.arange(10), |
224 | 245 | 'c': np.random.randn(10),
|
225 |
| - 'd': np.random.randn(10)}) |
226 |
| - |
227 |
| - idf = df.set_index(['a', 'b']) |
228 |
| - |
229 |
| - result = idf.loc[(0, 0), :] |
230 |
| - expected = idf.loc[0, 0] |
231 |
| - expected2 = idf.xs((0, 0)) |
232 |
| - expected3 = idf.ix[0, 0] |
233 |
| - |
| 246 | + 'd': np.random.randn(10)} |
| 247 | + ).set_index(['a', 'b']) |
| 248 | + expected = df.loc[0, 0] |
| 249 | + result = df.loc[(0, 0), :] |
234 | 250 | tm.assert_series_equal(result, expected)
|
235 |
| - tm.assert_series_equal(result, expected2) |
236 |
| - tm.assert_series_equal(result, expected3) |
237 | 251 |
|
238 | 252 |
|
239 |
| -def test_getitem_toplevel(multiindex_dataframe_random_data): |
240 |
| - frame = multiindex_dataframe_random_data |
241 |
| - df = frame.T |
242 |
| - |
243 |
| - result = df['foo'] |
244 |
| - expected = df.reindex(columns=df.columns[:3]) |
245 |
| - expected.columns = expected.columns.droplevel(0) |
246 |
| - tm.assert_frame_equal(result, expected) |
247 |
| - |
248 |
| - result = df['bar'] |
249 |
| - result2 = df.loc[:, 'bar'] |
250 |
| - |
251 |
| - expected = df.reindex(columns=df.columns[3:5]) |
| 253 | +@pytest.mark.parametrize('indexer,expected_slice', [ |
| 254 | + (lambda df: df['foo'], slice(3)), |
| 255 | + (lambda df: df['bar'], slice(3, 5)), |
| 256 | + (lambda df: df.loc[:, 'bar'], slice(3, 5)) |
| 257 | +]) |
| 258 | +def test_getitem_toplevel( |
| 259 | + multiindex_dataframe_random_data, indexer, expected_slice): |
| 260 | + df = multiindex_dataframe_random_data.T |
| 261 | + expected = df.reindex(columns=df.columns[expected_slice]) |
252 | 262 | expected.columns = expected.columns.droplevel(0)
|
| 263 | + result = indexer(df) |
253 | 264 | tm.assert_frame_equal(result, expected)
|
254 |
| - tm.assert_frame_equal(result, result2) |
255 | 265 |
|
256 | 266 |
|
257 |
| -def test_getitem_int(multiindex_dataframe_random_data): |
258 |
| - levels = [[0, 1], [0, 1, 2]] |
259 |
| - codes = [[0, 0, 0, 1, 1, 1], [0, 1, 2, 0, 1, 2]] |
260 |
| - index = MultiIndex(levels=levels, codes=codes) |
261 |
| - |
262 |
| - frame = DataFrame(np.random.randn(6, 2), index=index) |
263 |
| - |
264 |
| - result = frame.loc[1] |
265 |
| - expected = frame[-3:] |
| 267 | +def test_getitem_int(frame_random_data_integer_multi_index): |
| 268 | + df = frame_random_data_integer_multi_index |
| 269 | + result = df.loc[1] |
| 270 | + expected = df[-3:] |
266 | 271 | expected.index = expected.index.droplevel(0)
|
267 | 272 | tm.assert_frame_equal(result, expected)
|
268 | 273 |
|
269 |
| - # raises exception |
| 274 | + |
| 275 | +def test_getitem_int_raises_exception(frame_random_data_integer_multi_index): |
| 276 | + df = frame_random_data_integer_multi_index |
270 | 277 | msg = "3"
|
271 | 278 | with pytest.raises(KeyError, match=msg):
|
272 |
| - frame.loc.__getitem__(3) |
| 279 | + df.loc.__getitem__(3) |
273 | 280 |
|
274 |
| - # however this will work |
275 |
| - frame = multiindex_dataframe_random_data |
276 |
| - result = frame.iloc[2] |
277 |
| - expected = frame.xs(frame.index[2]) |
278 |
| - tm.assert_series_equal(result, expected) |
279 | 281 |
|
| 282 | +def test_getitem_iloc(multiindex_dataframe_random_data): |
| 283 | + df = multiindex_dataframe_random_data |
| 284 | + result = df.iloc[2] |
| 285 | + expected = df.xs(df.index[2]) |
| 286 | + tm.assert_series_equal(result, expected) |
280 | 287 |
|
281 |
| -def test_frame_getitem_view(multiindex_dataframe_random_data): |
282 |
| - frame = multiindex_dataframe_random_data |
283 |
| - df = frame.T.copy() |
284 | 288 |
|
| 289 | +def test_frame_setitem_view_direct(multiindex_dataframe_random_data): |
285 | 290 | # this works because we are modifying the underlying array
|
286 | 291 | # really a no-no
|
| 292 | + df = multiindex_dataframe_random_data.T |
287 | 293 | df['foo'].values[:] = 0
|
288 | 294 | assert (df['foo'].values == 0).all()
|
289 | 295 |
|
290 |
| - # but not if it's mixed-type |
291 |
| - df['foo', 'four'] = 'foo' |
292 |
| - df = df.sort_index(level=0, axis=1) |
293 | 296 |
|
294 |
| - # this will work, but will raise/warn as its chained assignment |
295 |
| - def f(): |
| 297 | +def test_frame_setitem_copy_raises(multiindex_dataframe_random_data): |
| 298 | + # will raise/warn as its chained assignment |
| 299 | + df = multiindex_dataframe_random_data.T |
| 300 | + msg = "A value is trying to be set on a copy of a slice from a DataFrame" |
| 301 | + with pytest.raises(com.SettingWithCopyError, match=msg): |
296 | 302 | df['foo']['one'] = 2
|
297 |
| - return df |
298 | 303 |
|
| 304 | + |
| 305 | +def test_frame_setitem_copy_no_write(multiindex_dataframe_random_data): |
| 306 | + frame = multiindex_dataframe_random_data.T |
| 307 | + expected = frame |
| 308 | + df = frame.copy() |
299 | 309 | msg = "A value is trying to be set on a copy of a slice from a DataFrame"
|
300 | 310 | with pytest.raises(com.SettingWithCopyError, match=msg):
|
301 | 311 | df['foo']['one'] = 2
|
302 | 312 |
|
303 |
| - try: |
304 |
| - df = f() |
305 |
| - except ValueError: |
306 |
| - pass |
307 |
| - assert (df['foo', 'one'] == 0).all() |
| 313 | + result = df |
| 314 | + tm.assert_frame_equal(result, expected) |
308 | 315 |
|
309 | 316 |
|
310 | 317 | def test_getitem_lowerdim_corner(multiindex_dataframe_random_data):
|
311 |
| - frame = multiindex_dataframe_random_data |
312 |
| - msg = "11" |
313 |
| - with pytest.raises(KeyError, match=msg): |
314 |
| - frame.loc.__getitem__((('bar', 'three'), 'B')) |
| 318 | + df = multiindex_dataframe_random_data |
| 319 | + |
| 320 | + # test setup - check key not in dataframe |
| 321 | + with pytest.raises(KeyError, match="11"): |
| 322 | + df.loc[('bar', 'three'), 'B'] |
315 | 323 |
|
316 | 324 | # in theory should be inserting in a sorted space????
|
317 |
| - frame.loc[('bar', 'three'), 'B'] = 0 |
318 |
| - assert frame.sort_index().loc[('bar', 'three'), 'B'] == 0 |
| 325 | + df.loc[('bar', 'three'), 'B'] = 0 |
| 326 | + expected = 0 |
| 327 | + result = df.sort_index().loc[('bar', 'three'), 'B'] |
| 328 | + assert result == expected |
319 | 329 |
|
320 | 330 |
|
321 | 331 | @pytest.mark.parametrize('unicode_strings', [True, False])
|
@@ -345,41 +355,37 @@ def test_mixed_depth_get(unicode_strings):
|
345 | 355 | tm.assert_series_equal(result, expected)
|
346 | 356 |
|
347 | 357 |
|
348 |
| -def test_mi_access(): |
349 |
| - |
| 358 | +@pytest.mark.parametrize('indexer', [ |
| 359 | + lambda df: df.loc[:, ('A', 'A1')], |
| 360 | + lambda df: df[('A', 'A1')] |
| 361 | +]) |
| 362 | +def test_mi_access(dataframe_with_duplicate_index, indexer): |
350 | 363 | # GH 4145
|
351 |
| - data = """h1 main h3 sub h5 |
352 |
| -0 a A 1 A1 1 |
353 |
| -1 b B 2 B1 2 |
354 |
| -2 c B 3 A1 3 |
355 |
| -3 d A 4 B2 4 |
356 |
| -4 e A 5 B2 5 |
357 |
| -5 f B 6 A2 6 |
358 |
| -""" |
359 |
| - |
360 |
| - df = pd.read_csv(StringIO(data), sep=r'\s+', index_col=0) |
361 |
| - df2 = df.set_index(['main', 'sub']).T.sort_index(1) |
| 364 | + df = dataframe_with_duplicate_index |
362 | 365 | index = Index(['h1', 'h3', 'h5'])
|
363 | 366 | columns = MultiIndex.from_tuples([('A', 'A1')], names=['main', 'sub'])
|
364 | 367 | expected = DataFrame([['a', 1, 1]], index=columns, columns=index).T
|
365 | 368 |
|
366 |
| - result = df2.loc[:, ('A', 'A1')] |
| 369 | + result = indexer(df) |
367 | 370 | tm.assert_frame_equal(result, expected)
|
368 | 371 |
|
369 |
| - result = df2[('A', 'A1')] |
370 |
| - tm.assert_frame_equal(result, expected) |
371 | 372 |
|
| 373 | +def test_mi_access_returns_series(dataframe_with_duplicate_index): |
372 | 374 | # GH 4146, not returning a block manager when selecting a unique index
|
373 | 375 | # from a duplicate index
|
374 | 376 | # as of 4879, this returns a Series (which is similar to what happens
|
375 | 377 | # with a non-unique)
|
| 378 | + df = dataframe_with_duplicate_index |
376 | 379 | expected = Series(['a', 1, 1], index=['h1', 'h3', 'h5'], name='A1')
|
377 |
| - result = df2['A']['A1'] |
| 380 | + result = df['A']['A1'] |
378 | 381 | tm.assert_series_equal(result, expected)
|
379 | 382 |
|
| 383 | + |
| 384 | +def test_mi_access_returns_frame(dataframe_with_duplicate_index): |
380 | 385 | # selecting a non_unique from the 2nd level
|
| 386 | + df = dataframe_with_duplicate_index |
381 | 387 | expected = DataFrame([['d', 4, 4], ['e', 5, 5]],
|
382 | 388 | index=Index(['B2', 'B2'], name='sub'),
|
383 | 389 | columns=['h1', 'h3', 'h5'], ).T
|
384 |
| - result = df2['A']['B2'] |
| 390 | + result = df['A']['B2'] |
385 | 391 | tm.assert_frame_equal(result, expected)
|
0 commit comments