23
23
from pandas .tests .frame .common import TestData
24
24
25
25
26
+ @pytest .fixture
27
+ def frame_of_index_cols ():
28
+ df = DataFrame ({'A' : ['foo' , 'foo' , 'foo' , 'bar' , 'bar' ],
29
+ 'B' : ['one' , 'two' , 'three' , 'one' , 'two' ],
30
+ 'C' : ['a' , 'b' , 'c' , 'd' , 'e' ],
31
+ 'D' : np .random .randn (5 ),
32
+ 'E' : np .random .randn (5 )})
33
+ return df
34
+
35
+
26
36
class TestDataFrameAlterAxes (TestData ):
27
37
28
38
def test_set_index_directly (self ):
@@ -54,8 +64,9 @@ def test_set_index_cast(self):
54
64
@pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
55
65
@pytest .mark .parametrize ('inplace' , [True , False ])
56
66
@pytest .mark .parametrize ('drop' , [True , False ])
57
- def test_set_index_drop_inplace (self , drop , inplace , keys ):
58
- df = self .dummy .copy ()
67
+ def test_set_index_drop_inplace (self , frame_of_index_cols ,
68
+ drop , inplace , keys ):
69
+ df = frame_of_index_cols
59
70
60
71
if isinstance (keys , list ):
61
72
idx = MultiIndex .from_arrays ([df [x ] for x in keys ], names = keys )
@@ -75,8 +86,8 @@ def test_set_index_drop_inplace(self, drop, inplace, keys):
75
86
# A has duplicate values, C does not
76
87
@pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
77
88
@pytest .mark .parametrize ('drop' , [True , False ])
78
- def test_set_index_append (self , drop , keys ):
79
- df = self . dummy . copy ()
89
+ def test_set_index_append (self , frame_of_index_cols , drop , keys ):
90
+ df = frame_of_index_cols
80
91
81
92
keys = keys if isinstance (keys , list ) else [keys ]
82
93
idx = MultiIndex .from_arrays ([df .index ] + [df [x ] for x in keys ],
@@ -91,12 +102,14 @@ def test_set_index_append(self, drop, keys):
91
102
# A has duplicate values, C does not
92
103
@pytest .mark .parametrize ('keys' , ['A' , 'C' , ['A' , 'B' ]])
93
104
@pytest .mark .parametrize ('drop' , [True , False ])
94
- def test_set_index_append_to_multiindex (self , drop , keys ):
105
+ def test_set_index_append_to_multiindex (self , frame_of_index_cols ,
106
+ drop , keys ):
95
107
# append to existing multiindex
96
- df = self . dummy .set_index (['D' ], drop = drop , append = True )
108
+ df = frame_of_index_cols .set_index (['D' ], drop = drop , append = True )
97
109
98
110
keys = keys if isinstance (keys , list ) else [keys ]
99
- expected = self .dummy .set_index (['D' ] + keys , drop = drop , append = True )
111
+ expected = frame_of_index_cols .set_index (['D' ] + keys ,
112
+ drop = drop , append = True )
100
113
101
114
result = df .set_index (keys , drop = drop , append = True )
102
115
@@ -112,19 +125,18 @@ def test_set_index_after_mutation(self):
112
125
result = df2 .set_index ('key' )
113
126
tm .assert_frame_equal (result , expected )
114
127
128
+ # MultiIndex constructor does not work directly on Series -> lambda
115
129
# also test index name if append=True (name is duplicate here for B)
116
- @pytest .mark .parametrize ('box' , [Series , Index , np .array , 'MultiIndex' ])
130
+ @pytest .mark .parametrize ('box' , [Series , Index , np .array ,
131
+ lambda x : MultiIndex .from_arrays ([x ])])
117
132
@pytest .mark .parametrize ('append, index_name' , [(True , None ),
118
133
(True , 'B' ), (True , 'test' ), (False , None )])
119
134
@pytest .mark .parametrize ('drop' , [True , False ])
120
- def test_set_index_pass_single_array (self , drop , append , index_name , box ):
121
- df = self .dummy .copy ()
135
+ def test_set_index_pass_single_array (self , frame_of_index_cols ,
136
+ drop , append , index_name , box ):
137
+ df = frame_of_index_cols
122
138
df .index .name = index_name
123
139
124
- # update constructor in case of MultiIndex
125
- box = ((lambda x : MultiIndex .from_arrays ([x ]))
126
- if box == 'MultiIndex' else box )
127
-
128
140
key = box (df ['B' ])
129
141
# np.array and list "forget" the name of B
130
142
name = [None if box in [np .array , list ] else 'B' ]
@@ -138,21 +150,19 @@ def test_set_index_pass_single_array(self, drop, append, index_name, box):
138
150
139
151
tm .assert_frame_equal (result , expected )
140
152
153
+ # MultiIndex constructor does not work directly on Series -> lambda
141
154
# also test index name if append=True (name is duplicate here for A & B)
142
- @pytest .mark .parametrize ('box' , [Series , Index , np .array ,
143
- list , ' MultiIndex' ])
155
+ @pytest .mark .parametrize ('box' , [Series , Index , np .array , list ,
156
+ lambda x : MultiIndex . from_arrays ([ x ]) ])
144
157
@pytest .mark .parametrize ('append, index_name' ,
145
158
[(True , None ), (True , 'A' ), (True , 'B' ),
146
159
(True , 'test' ), (False , None )])
147
160
@pytest .mark .parametrize ('drop' , [True , False ])
148
- def test_set_index_pass_arrays (self , drop , append , index_name , box ):
149
- df = self .dummy .copy ()
161
+ def test_set_index_pass_arrays (self , frame_of_index_cols ,
162
+ drop , append , index_name , box ):
163
+ df = frame_of_index_cols
150
164
df .index .name = index_name
151
165
152
- # update constructor in case of MultiIndex
153
- box = ((lambda x : MultiIndex .from_arrays ([x ]))
154
- if box == 'MultiIndex' else box )
155
-
156
166
keys = ['A' , box (df ['B' ])]
157
167
# np.array and list "forget" the name of B
158
168
names = ['A' , None if box in [np .array , list ] else 'B' ]
@@ -167,28 +177,24 @@ def test_set_index_pass_arrays(self, drop, append, index_name, box):
167
177
168
178
tm .assert_frame_equal (result , expected )
169
179
180
+ # MultiIndex constructor does not work directly on Series -> lambda
181
+ # We also emulate a "constructor" for the label -> lambda
170
182
# also test index name if append=True (name is duplicate here for A)
171
- @pytest .mark .parametrize ('box1' , ['label' , Series , Index , np .array ,
172
- list , 'MultiIndex' ])
173
- @pytest .mark .parametrize ('box2' , ['label' , Series , Index , np .array ,
174
- list , 'MultiIndex' ])
183
+ @pytest .mark .parametrize ('box2' , [Series , Index , np .array , list ,
184
+ lambda x : MultiIndex .from_arrays ([x ]),
185
+ lambda x : x .name ])
186
+ @pytest .mark .parametrize ('box1' , [Series , Index , np .array , list ,
187
+ lambda x : MultiIndex .from_arrays ([x ]),
188
+ lambda x : x .name ])
175
189
@pytest .mark .parametrize ('append, index_name' , [(True , None ),
176
190
(True , 'A' ), (True , 'test' ), (False , None )])
177
191
@pytest .mark .parametrize ('drop' , [True , False ])
178
- def test_set_index_pass_arrays_duplicate (self , drop , append , index_name ,
179
- box1 , box2 ):
180
- df = self . dummy . copy ()
192
+ def test_set_index_pass_arrays_duplicate (self , frame_of_index_cols , drop ,
193
+ append , index_name , box1 , box2 ):
194
+ df = frame_of_index_cols
181
195
df .index .name = index_name
182
196
183
- # transform strings to correct box constructor
184
- def rebox (x ):
185
- if x == 'label' :
186
- return lambda x : x .name
187
- elif x == 'MultiIndex' :
188
- return lambda x : MultiIndex .from_arrays ([x ])
189
- return x
190
-
191
- keys = [rebox (box1 )(df ['A' ]), rebox (box2 )(df ['A' ])]
197
+ keys = [box1 (df ['A' ]), box2 (df ['A' ])]
192
198
193
199
# == gives ambiguous Boolean for Series
194
200
if keys [0 ] is 'A' and keys [1 ] is 'A' :
@@ -208,8 +214,9 @@ def rebox(x):
208
214
209
215
@pytest .mark .parametrize ('append' , [True , False ])
210
216
@pytest .mark .parametrize ('drop' , [True , False ])
211
- def test_set_index_pass_multiindex (self , drop , append ):
212
- df = self .dummy .copy ()
217
+ def test_set_index_pass_multiindex (self , frame_of_index_cols ,
218
+ drop , append ):
219
+ df = frame_of_index_cols
213
220
keys = MultiIndex .from_arrays ([df ['A' ], df ['B' ]], names = ['A' , 'B' ])
214
221
215
222
result = df .set_index (keys , drop = drop , append = append )
@@ -219,8 +226,8 @@ def test_set_index_pass_multiindex(self, drop, append):
219
226
220
227
tm .assert_frame_equal (result , expected )
221
228
222
- def test_set_index_verify_integrity (self ):
223
- df = self . dummy . copy ()
229
+ def test_set_index_verify_integrity (self , frame_of_index_cols ):
230
+ df = frame_of_index_cols
224
231
225
232
with tm .assert_raises_regex (ValueError ,
226
233
'Index has duplicate keys' ):
@@ -230,8 +237,8 @@ def test_set_index_verify_integrity(self):
230
237
'Index has duplicate keys' ):
231
238
df .set_index ([df ['A' ], df ['A' ]], verify_integrity = True )
232
239
233
- def test_set_index_raise (self ):
234
- df = self . dummy . copy ()
240
+ def test_set_index_raise (self , frame_of_index_cols ):
241
+ df = frame_of_index_cols
235
242
236
243
with tm .assert_raises_regex (KeyError , '.*' ): # column names are A-E
237
244
df .set_index (['foo' , 'bar' , 'baz' ], verify_integrity = True )
0 commit comments