@@ -49,7 +49,8 @@ def test_set_index_cast(self):
49
49
tm .assert_frame_equal (df , df2 )
50
50
51
51
# A has duplicate values, C does not
52
- @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
52
+ @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ],
53
+ ('tuple' , 'as' , 'label' )])
53
54
@pytest .mark .parametrize ('inplace' , [True , False ])
54
55
@pytest .mark .parametrize ('drop' , [True , False ])
55
56
def test_set_index_drop_inplace (self , frame_of_index_cols ,
@@ -72,7 +73,8 @@ def test_set_index_drop_inplace(self, frame_of_index_cols,
72
73
tm .assert_frame_equal (result , expected )
73
74
74
75
# A has duplicate values, C does not
75
- @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
76
+ @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ],
77
+ ('tuple' , 'as' , 'label' )])
76
78
@pytest .mark .parametrize ('drop' , [True , False ])
77
79
def test_set_index_append (self , frame_of_index_cols , drop , keys ):
78
80
df = frame_of_index_cols
@@ -88,7 +90,8 @@ def test_set_index_append(self, frame_of_index_cols, drop, keys):
88
90
tm .assert_frame_equal (result , expected )
89
91
90
92
# A has duplicate values, C does not
91
- @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
93
+ @pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ],
94
+ ('tuple' , 'as' , 'label' )])
92
95
@pytest .mark .parametrize ('drop' , [True , False ])
93
96
def test_set_index_append_to_multiindex (self , frame_of_index_cols ,
94
97
drop , keys ):
@@ -114,8 +117,10 @@ def test_set_index_after_mutation(self):
114
117
tm .assert_frame_equal (result , expected )
115
118
116
119
# MultiIndex constructor does not work directly on Series -> lambda
120
+ # Add list-of-list constructor because list is ambiguous -> lambda
117
121
# also test index name if append=True (name is duplicate here for B)
118
122
@pytest .mark .parametrize ('box' , [Series , Index , np .array ,
123
+ list , tuple , iter , lambda x : [list (x )],
119
124
lambda x : MultiIndex .from_arrays ([x ])])
120
125
@pytest .mark .parametrize ('append, index_name' , [(True , None ),
121
126
(True , 'B' ), (True , 'test' ), (False , None )])
@@ -126,21 +131,29 @@ def test_set_index_pass_single_array(self, frame_of_index_cols,
126
131
df .index .name = index_name
127
132
128
133
key = box (df ['B' ])
129
- # np.array and list "forget" the name of B
130
- name = [None if box in [np .array , list ] else 'B' ]
134
+ if box == list :
135
+ # list of strings gets interpreted as list of keys
136
+ msg = "['one', 'two', 'three', 'one', 'two']"
137
+ with tm .assert_raises_regex (KeyError , msg ):
138
+ df .set_index (key , drop = drop , append = append )
139
+ else :
140
+ # np.array/tuple/iter/list-of-list "forget" the name of B
141
+ name_mi = getattr (key , 'names' , None )
142
+ name = [getattr (key , 'name' , None )] if name_mi is None else name_mi
131
143
132
- result = df .set_index (key , drop = drop , append = append )
144
+ result = df .set_index (key , drop = drop , append = append )
133
145
134
- # only valid column keys are dropped
135
- # since B is always passed as array above, nothing is dropped
136
- expected = df .set_index (['B' ], drop = False , append = append )
137
- expected .index .names = [index_name ] + name if append else name
146
+ # only valid column keys are dropped
147
+ # since B is always passed as array above, nothing is dropped
148
+ expected = df .set_index (['B' ], drop = False , append = append )
149
+ expected .index .names = [index_name ] + name if append else name
138
150
139
- tm .assert_frame_equal (result , expected )
151
+ tm .assert_frame_equal (result , expected )
140
152
141
153
# MultiIndex constructor does not work directly on Series -> lambda
142
154
# also test index name if append=True (name is duplicate here for A & B)
143
- @pytest .mark .parametrize ('box' , [Series , Index , np .array , list ,
155
+ @pytest .mark .parametrize ('box' , [Series , Index , np .array ,
156
+ list , tuple , iter ,
144
157
lambda x : MultiIndex .from_arrays ([x ])])
145
158
@pytest .mark .parametrize ('append, index_name' ,
146
159
[(True , None ), (True , 'A' ), (True , 'B' ),
@@ -152,8 +165,8 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
152
165
df .index .name = index_name
153
166
154
167
keys = ['A' , box (df ['B' ])]
155
- # np.array and list "forget" the name of B
156
- names = ['A' , None if box in [np .array , list ] else 'B' ]
168
+ # np.array/ list/tuple/iter "forget" the name of B
169
+ names = ['A' , None if box in [np .array , list , tuple , iter ] else 'B' ]
157
170
158
171
result = df .set_index (keys , drop = drop , append = append )
159
172
@@ -168,10 +181,12 @@ def test_set_index_pass_arrays(self, frame_of_index_cols,
168
181
# MultiIndex constructor does not work directly on Series -> lambda
169
182
# We also emulate a "constructor" for the label -> lambda
170
183
# also test index name if append=True (name is duplicate here for A)
171
- @pytest .mark .parametrize ('box2' , [Series , Index , np .array , list ,
184
+ @pytest .mark .parametrize ('box2' , [Series , Index , np .array ,
185
+ list , tuple , iter ,
172
186
lambda x : MultiIndex .from_arrays ([x ]),
173
187
lambda x : x .name ])
174
- @pytest .mark .parametrize ('box1' , [Series , Index , np .array , list ,
188
+ @pytest .mark .parametrize ('box1' , [Series , Index , np .array ,
189
+ list , tuple , iter ,
175
190
lambda x : MultiIndex .from_arrays ([x ]),
176
191
lambda x : x .name ])
177
192
@pytest .mark .parametrize ('append, index_name' , [(True , None ),
@@ -183,21 +198,22 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop,
183
198
df .index .name = index_name
184
199
185
200
keys = [box1 (df ['A' ]), box2 (df ['A' ])]
201
+ result = df .set_index (keys , drop = drop , append = append )
186
202
187
- # == gives ambiguous Boolean for Series
188
- if drop and keys [0 ] is 'A' and keys [1 ] is 'A' :
189
- with tm .assert_raises_regex (KeyError , '.*' ):
190
- df .set_index (keys , drop = drop , append = append )
191
- else :
192
- result = df .set_index (keys , drop = drop , append = append )
203
+ # if either box was iter, the content has been consumed; re-read it
204
+ keys = [box1 (df ['A' ]), box2 (df ['A' ])]
193
205
194
- # to test against already-tested behavior, we add sequentially,
195
- # hence second append always True; must wrap in list, otherwise
196
- # list-box will be illegal
197
- expected = df .set_index ([keys [0 ]], drop = drop , append = append )
198
- expected = expected .set_index ([keys [1 ]], drop = drop , append = True )
206
+ # need to adapt first drop for case that both keys are 'A' --
207
+ # cannot drop the same column twice;
208
+ # use "is" because == would give ambiguous Boolean error for containers
209
+ first_drop = False if (keys [0 ] is 'A' and keys [1 ] is 'A' ) else drop
199
210
200
- tm .assert_frame_equal (result , expected )
211
+ # to test against already-tested behaviour, we add sequentially,
212
+ # hence second append always True; must wrap keys in list, otherwise
213
+ # box = list would be illegal
214
+ expected = df .set_index ([keys [0 ]], drop = first_drop , append = append )
215
+ expected = expected .set_index ([keys [1 ]], drop = drop , append = True )
216
+ tm .assert_frame_equal (result , expected )
201
217
202
218
@pytest .mark .parametrize ('append' , [True , False ])
203
219
@pytest .mark .parametrize ('drop' , [True , False ])
@@ -229,13 +245,24 @@ def test_set_index_verify_integrity(self, frame_of_index_cols):
229
245
def test_set_index_raise (self , frame_of_index_cols , drop , append ):
230
246
df = frame_of_index_cols
231
247
232
- with tm .assert_raises_regex (KeyError , '.*' ): # column names are A-E
248
+ with tm .assert_raises_regex (KeyError , "['foo', 'bar', 'baz']" ):
249
+ # column names are A-E, as well as one tuple
233
250
df .set_index (['foo' , 'bar' , 'baz' ], drop = drop , append = append )
234
251
235
252
# non-existent key in list with arrays
236
- with tm .assert_raises_regex (KeyError , '.* ' ):
253
+ with tm .assert_raises_regex (KeyError , 'X ' ):
237
254
df .set_index ([df ['A' ], df ['B' ], 'X' ], drop = drop , append = append )
238
255
256
+ msg = 'The parameter "keys" may only contain a combination of.*'
257
+ # forbidden type, e.g. set
258
+ with tm .assert_raises_regex (TypeError , msg ):
259
+ df .set_index (set (df ['A' ]), drop = drop , append = append )
260
+
261
+ # forbidden type in list, e.g. set
262
+ with tm .assert_raises_regex (TypeError , msg ):
263
+ df .set_index (['A' , df ['A' ], set (df ['A' ])],
264
+ drop = drop , append = append )
265
+
239
266
def test_construction_with_categorical_index (self ):
240
267
ci = tm .makeCategoricalIndex (10 )
241
268
ci .name = 'B'
0 commit comments