1
1
import numpy as np
2
2
import pytest
3
3
4
- from pandas .compat import range , u , zip
4
+ from pandas .compat import u , zip
5
5
6
- import pandas as pd
7
6
from pandas import DataFrame , Index , MultiIndex , Series
8
- import pandas .core .common as com
9
7
from pandas .core .indexing import IndexingError
10
8
from pandas .util import testing as tm
11
9
12
-
13
- @pytest .fixture
14
- def frame_random_data_integer_multi_index ():
15
- levels = [[0 , 1 ], [0 , 1 , 2 ]]
16
- codes = [[0 , 0 , 0 , 1 , 1 , 1 ], [0 , 1 , 2 , 0 , 1 , 2 ]]
17
- index = MultiIndex (levels = levels , codes = codes )
18
- return DataFrame (np .random .randn (6 , 2 ), index = index )
19
-
20
-
21
- @pytest .fixture
22
- def dataframe_with_duplicate_index ():
23
- """Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
24
- data = [['a' , 'd' , 'e' , 'c' , 'f' , 'b' ],
25
- [1 , 4 , 5 , 3 , 6 , 2 ],
26
- [1 , 4 , 5 , 3 , 6 , 2 ]]
27
- index = ['h1' , 'h3' , 'h5' ]
28
- columns = MultiIndex (
29
- levels = [['A' , 'B' ], ['A1' , 'A2' , 'B1' , 'B2' ]],
30
- codes = [[0 , 0 , 0 , 1 , 1 , 1 ], [0 , 3 , 3 , 0 , 1 , 2 ]],
31
- names = ['main' , 'sub' ])
32
- return DataFrame (data , index = index , columns = columns )
10
+ # ----------------------------------------------------------------------------
11
+ # test indexing of Series with multi-level Index
12
+ # ----------------------------------------------------------------------------
33
13
34
14
35
15
@pytest .mark .parametrize ('access_method' , [lambda s , x : s [:, x ],
@@ -51,7 +31,7 @@ def test_series_getitem_multiindex(access_method, level1_value, expected):
51
31
52
32
53
33
@pytest .mark .parametrize ('level0_value' , ['D' , 'A' ])
54
- def test_getitem_duplicates_multiindex (level0_value ):
34
+ def test_series_getitem_duplicates_multiindex (level0_value ):
55
35
# GH 5725 the 'A' happens to be a valid Timestamp so the doesn't raise
56
36
# the appropriate error, only in PY3 of course!
57
37
@@ -65,12 +45,10 @@ def test_getitem_duplicates_multiindex(level0_value):
65
45
66
46
# confirm indexing on missing value raises KeyError
67
47
if level0_value != 'A' :
68
- msg = "'A'"
69
- with pytest .raises (KeyError , match = msg ):
48
+ with pytest .raises (KeyError , match = r"^'A'$" ):
70
49
df .val ['A' ]
71
50
72
- msg = "'X'"
73
- with pytest .raises (KeyError , match = msg ):
51
+ with pytest .raises (KeyError , match = r"^'X'$" ):
74
52
df .val ['X' ]
75
53
76
54
result = df .val [level0_value ]
@@ -79,89 +57,6 @@ def test_getitem_duplicates_multiindex(level0_value):
79
57
tm .assert_series_equal (result , expected )
80
58
81
59
82
- @pytest .mark .parametrize ('indexer, is_level1, expected_error' , [
83
- ([], False , None ), # empty ok
84
- (['A' ], False , None ),
85
- (['A' , 'D' ], False , None ),
86
- (['D' ], False , r"\['D'\] not in index" ), # not any values found
87
- (pd .IndexSlice [:, ['foo' ]], True , None ),
88
- (pd .IndexSlice [:, ['foo' , 'bah' ]], True , None )
89
- ])
90
- def test_getitem_duplicates_multiindex_missing_indexers (indexer , is_level1 ,
91
- expected_error ):
92
- # GH 7866
93
- # multi-index slicing with missing indexers
94
- idx = MultiIndex .from_product ([['A' , 'B' , 'C' ],
95
- ['foo' , 'bar' , 'baz' ]],
96
- names = ['one' , 'two' ])
97
- s = Series (np .arange (9 , dtype = 'int64' ), index = idx ).sort_index ()
98
-
99
- if indexer == []:
100
- expected = s .iloc [[]]
101
- elif is_level1 :
102
- expected = Series ([0 , 3 , 6 ], index = MultiIndex .from_product (
103
- [['A' , 'B' , 'C' ], ['foo' ]], names = ['one' , 'two' ])).sort_index ()
104
- else :
105
- exp_idx = MultiIndex .from_product ([['A' ], ['foo' , 'bar' , 'baz' ]],
106
- names = ['one' , 'two' ])
107
- expected = Series (np .arange (3 , dtype = 'int64' ),
108
- index = exp_idx ).sort_index ()
109
-
110
- if expected_error is not None :
111
- with pytest .raises (KeyError , match = expected_error ):
112
- s .loc [indexer ]
113
- else :
114
- result = s .loc [indexer ]
115
- tm .assert_series_equal (result , expected )
116
-
117
-
118
- @pytest .mark .parametrize ('columns_indexer' , [
119
- ([], slice (None )),
120
- (['foo' ], [])
121
- ])
122
- def test_getitem_duplicates_multiindex_empty_indexer (columns_indexer ):
123
- # GH 8737
124
- # empty indexer
125
- multi_index = MultiIndex .from_product ((['foo' , 'bar' , 'baz' ],
126
- ['alpha' , 'beta' ]))
127
- df = DataFrame (np .random .randn (5 , 6 ), index = range (5 ), columns = multi_index )
128
- df = df .sort_index (level = 0 , axis = 1 )
129
-
130
- expected = DataFrame (index = range (5 ), columns = multi_index .reindex ([])[0 ])
131
- result = df .loc [:, columns_indexer ]
132
- tm .assert_frame_equal (result , expected )
133
-
134
-
135
- def test_getitem_duplicates_multiindex_non_scalar_type_object ():
136
- # regression from < 0.14.0
137
- # GH 7914
138
- df = DataFrame ([[np .mean , np .median ], ['mean' , 'median' ]],
139
- columns = MultiIndex .from_tuples ([('functs' , 'mean' ),
140
- ('functs' , 'median' )]),
141
- index = ['function' , 'name' ])
142
- result = df .loc ['function' , ('functs' , 'mean' )]
143
- expected = np .mean
144
- assert result == expected
145
-
146
-
147
- def test_getitem_simple (multiindex_dataframe_random_data ):
148
- df = multiindex_dataframe_random_data .T
149
- expected = df .values [:, 0 ]
150
- result = df ['foo' , 'one' ].values
151
- tm .assert_almost_equal (result , expected )
152
-
153
-
154
- @pytest .mark .parametrize ('indexer,msg' , [
155
- (lambda df : df [('foo' , 'four' )], r"\('foo', 'four'\)" ),
156
- (lambda df : df ['foobar' ], "'foobar'" )
157
- ])
158
- def test_getitem_simple_key_error (
159
- multiindex_dataframe_random_data , indexer , msg ):
160
- df = multiindex_dataframe_random_data .T
161
- with pytest .raises (KeyError , match = msg ):
162
- indexer (df )
163
-
164
-
165
60
@pytest .mark .parametrize ('indexer' , [
166
61
lambda s : s [2000 , 3 ],
167
62
lambda s : s .loc [2000 , 3 ]
@@ -189,33 +84,20 @@ def test_series_getitem_returns_scalar(
189
84
assert result == expected
190
85
191
86
192
- @pytest .mark .filterwarnings ("ignore:\\ n.ix:DeprecationWarning" )
193
- @pytest .mark .parametrize ('indexer' , [
194
- lambda s : s .loc [[(2000 , 3 , 10 ), (2000 , 3 , 13 )]],
195
- lambda s : s .ix [[(2000 , 3 , 10 ), (2000 , 3 , 13 )]]
196
- ])
197
- def test_series_getitem_fancy (
198
- multiindex_year_month_day_dataframe_random_data , indexer ):
199
- s = multiindex_year_month_day_dataframe_random_data ['A' ]
200
- expected = s .reindex (s .index [49 :51 ])
201
-
202
- result = indexer (s )
203
- tm .assert_series_equal (result , expected )
204
-
205
-
206
- @pytest .mark .parametrize ('indexer,error,msg' , [
207
- (lambda s : s .__getitem__ ((2000 , 3 , 4 )), KeyError , '356' ),
208
- (lambda s : s [(2000 , 3 , 4 )], KeyError , '356' ),
87
+ @pytest .mark .parametrize ('indexer,expected_error,expected_error_msg' , [
88
+ (lambda s : s .__getitem__ ((2000 , 3 , 4 )), KeyError , r"^356L?$" ),
89
+ (lambda s : s [(2000 , 3 , 4 )], KeyError , r"^356L?$" ),
209
90
(lambda s : s .loc [(2000 , 3 , 4 )], IndexingError , 'Too many indexers' ),
210
91
(lambda s : s .__getitem__ (len (s )), IndexError , 'index out of bounds' ),
211
92
(lambda s : s [len (s )], IndexError , 'index out of bounds' ),
212
93
(lambda s : s .iloc [len (s )], IndexError ,
213
94
'single positional indexer is out-of-bounds' )
214
95
])
215
96
def test_series_getitem_indexing_errors (
216
- multiindex_year_month_day_dataframe_random_data , indexer , error , msg ):
97
+ multiindex_year_month_day_dataframe_random_data , indexer ,
98
+ expected_error , expected_error_msg ):
217
99
s = multiindex_year_month_day_dataframe_random_data ['A' ]
218
- with pytest .raises (error , match = msg ):
100
+ with pytest .raises (expected_error , match = expected_error_msg ):
219
101
indexer (s )
220
102
221
103
@@ -227,6 +109,28 @@ def test_series_getitem_corner_generator(
227
109
tm .assert_series_equal (result , expected )
228
110
229
111
112
+ # ----------------------------------------------------------------------------
113
+ # test indexing of DataFrame with multi-level Index
114
+ # ----------------------------------------------------------------------------
115
+
116
+ def test_getitem_simple (multiindex_dataframe_random_data ):
117
+ df = multiindex_dataframe_random_data .T
118
+ expected = df .values [:, 0 ]
119
+ result = df ['foo' , 'one' ].values
120
+ tm .assert_almost_equal (result , expected )
121
+
122
+
123
+ @pytest .mark .parametrize ('indexer,expected_error_msg' , [
124
+ (lambda df : df [('foo' , 'four' )], r"^\('foo', 'four'\)$" ),
125
+ (lambda df : df ['foobar' ], r"^'foobar'$" )
126
+ ])
127
+ def test_frame_getitem_simple_key_error (
128
+ multiindex_dataframe_random_data , indexer , expected_error_msg ):
129
+ df = multiindex_dataframe_random_data .T
130
+ with pytest .raises (KeyError , match = expected_error_msg ):
131
+ indexer (df )
132
+
133
+
230
134
def test_frame_getitem_multicolumn_empty_level ():
231
135
df = DataFrame ({'a' : ['1' , '2' , '3' ], 'b' : ['2' , '3' , '4' ]})
232
136
df .columns = [['level1 item1' , 'level1 item2' ], ['' , 'level2 item2' ],
@@ -238,24 +142,12 @@ def test_frame_getitem_multicolumn_empty_level():
238
142
tm .assert_frame_equal (result , expected )
239
143
240
144
241
- def test_getitem_tuple_plus_slice ():
242
- # GH 671
243
- df = DataFrame ({'a' : np .arange (10 ),
244
- 'b' : np .arange (10 ),
245
- 'c' : np .random .randn (10 ),
246
- 'd' : np .random .randn (10 )}
247
- ).set_index (['a' , 'b' ])
248
- expected = df .loc [0 , 0 ]
249
- result = df .loc [(0 , 0 ), :]
250
- tm .assert_series_equal (result , expected )
251
-
252
-
253
145
@pytest .mark .parametrize ('indexer,expected_slice' , [
254
146
(lambda df : df ['foo' ], slice (3 )),
255
147
(lambda df : df ['bar' ], slice (3 , 5 )),
256
148
(lambda df : df .loc [:, 'bar' ], slice (3 , 5 ))
257
149
])
258
- def test_getitem_toplevel (
150
+ def test_frame_getitem_toplevel (
259
151
multiindex_dataframe_random_data , indexer , expected_slice ):
260
152
df = multiindex_dataframe_random_data .T
261
153
expected = df .reindex (columns = df .columns [expected_slice ])
@@ -264,72 +156,8 @@ def test_getitem_toplevel(
264
156
tm .assert_frame_equal (result , expected )
265
157
266
158
267
- def test_getitem_int (frame_random_data_integer_multi_index ):
268
- df = frame_random_data_integer_multi_index
269
- result = df .loc [1 ]
270
- expected = df [- 3 :]
271
- expected .index = expected .index .droplevel (0 )
272
- tm .assert_frame_equal (result , expected )
273
-
274
-
275
- def test_getitem_int_raises_exception (frame_random_data_integer_multi_index ):
276
- df = frame_random_data_integer_multi_index
277
- msg = "3"
278
- with pytest .raises (KeyError , match = msg ):
279
- df .loc .__getitem__ (3 )
280
-
281
-
282
- def test_getitem_iloc (multiindex_dataframe_random_data ):
283
- df = multiindex_dataframe_random_data
284
- result = df .iloc [2 ]
285
- expected = df .xs (df .index [2 ])
286
- tm .assert_series_equal (result , expected )
287
-
288
-
289
- def test_frame_setitem_view_direct (multiindex_dataframe_random_data ):
290
- # this works because we are modifying the underlying array
291
- # really a no-no
292
- df = multiindex_dataframe_random_data .T
293
- df ['foo' ].values [:] = 0
294
- assert (df ['foo' ].values == 0 ).all ()
295
-
296
-
297
- def test_frame_setitem_copy_raises (multiindex_dataframe_random_data ):
298
- # will raise/warn as its chained assignment
299
- df = multiindex_dataframe_random_data .T
300
- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
301
- with pytest .raises (com .SettingWithCopyError , match = msg ):
302
- df ['foo' ]['one' ] = 2
303
-
304
-
305
- def test_frame_setitem_copy_no_write (multiindex_dataframe_random_data ):
306
- frame = multiindex_dataframe_random_data .T
307
- expected = frame
308
- df = frame .copy ()
309
- msg = "A value is trying to be set on a copy of a slice from a DataFrame"
310
- with pytest .raises (com .SettingWithCopyError , match = msg ):
311
- df ['foo' ]['one' ] = 2
312
-
313
- result = df
314
- tm .assert_frame_equal (result , expected )
315
-
316
-
317
- def test_getitem_lowerdim_corner (multiindex_dataframe_random_data ):
318
- df = multiindex_dataframe_random_data
319
-
320
- # test setup - check key not in dataframe
321
- with pytest .raises (KeyError , match = "11" ):
322
- df .loc [('bar' , 'three' ), 'B' ]
323
-
324
- # in theory should be inserting in a sorted space????
325
- df .loc [('bar' , 'three' ), 'B' ] = 0
326
- expected = 0
327
- result = df .sort_index ().loc [('bar' , 'three' ), 'B' ]
328
- assert result == expected
329
-
330
-
331
159
@pytest .mark .parametrize ('unicode_strings' , [True , False ])
332
- def test_mixed_depth_get (unicode_strings ):
160
+ def test_frame_mixed_depth_get (unicode_strings ):
333
161
# If unicode_strings is True, the column labels in dataframe
334
162
# construction will use unicode strings in Python 2 (pull request
335
163
# #17099).
@@ -355,11 +183,29 @@ def test_mixed_depth_get(unicode_strings):
355
183
tm .assert_series_equal (result , expected )
356
184
357
185
186
+ # ----------------------------------------------------------------------------
187
+ # test indexing of DataFrame with multi-level Index with duplicates
188
+ # ----------------------------------------------------------------------------
189
+
190
+ @pytest .fixture
191
+ def dataframe_with_duplicate_index ():
192
+ """Fixture for DataFrame used in tests for gh-4145 and gh-4146"""
193
+ data = [['a' , 'd' , 'e' , 'c' , 'f' , 'b' ],
194
+ [1 , 4 , 5 , 3 , 6 , 2 ],
195
+ [1 , 4 , 5 , 3 , 6 , 2 ]]
196
+ index = ['h1' , 'h3' , 'h5' ]
197
+ columns = MultiIndex (
198
+ levels = [['A' , 'B' ], ['A1' , 'A2' , 'B1' , 'B2' ]],
199
+ codes = [[0 , 0 , 0 , 1 , 1 , 1 ], [0 , 3 , 3 , 0 , 1 , 2 ]],
200
+ names = ['main' , 'sub' ])
201
+ return DataFrame (data , index = index , columns = columns )
202
+
203
+
358
204
@pytest .mark .parametrize ('indexer' , [
359
- lambda df : df . loc [:, ('A' , 'A1' )],
360
- lambda df : df [ ('A' , 'A1' )]
205
+ lambda df : df [ ('A' , 'A1' )],
206
+ lambda df : df . loc [:, ('A' , 'A1' )]
361
207
])
362
- def test_mi_access (dataframe_with_duplicate_index , indexer ):
208
+ def test_frame_mi_access (dataframe_with_duplicate_index , indexer ):
363
209
# GH 4145
364
210
df = dataframe_with_duplicate_index
365
211
index = Index (['h1' , 'h3' , 'h5' ])
@@ -370,7 +216,7 @@ def test_mi_access(dataframe_with_duplicate_index, indexer):
370
216
tm .assert_frame_equal (result , expected )
371
217
372
218
373
- def test_mi_access_returns_series (dataframe_with_duplicate_index ):
219
+ def test_frame_mi_access_returns_series (dataframe_with_duplicate_index ):
374
220
# GH 4146, not returning a block manager when selecting a unique index
375
221
# from a duplicate index
376
222
# as of 4879, this returns a Series (which is similar to what happens
@@ -381,7 +227,7 @@ def test_mi_access_returns_series(dataframe_with_duplicate_index):
381
227
tm .assert_series_equal (result , expected )
382
228
383
229
384
- def test_mi_access_returns_frame (dataframe_with_duplicate_index ):
230
+ def test_frame_mi_access_returns_frame (dataframe_with_duplicate_index ):
385
231
# selecting a non_unique from the 2nd level
386
232
df = dataframe_with_duplicate_index
387
233
expected = DataFrame ([['d' , 4 , 4 ], ['e' , 5 , 5 ]],
0 commit comments