@@ -150,6 +150,8 @@ def test_multiindex(self):
150
150
151
151
class TestGetDummies (tm .TestCase ):
152
152
153
+ sparse = False
154
+
153
155
def setUp (self ):
154
156
self .df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
155
157
'C' : [1 , 2 , 3 ]})
@@ -162,20 +164,20 @@ def test_basic(self):
162
164
expected = DataFrame ({'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
163
165
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 },
164
166
'c' : {0 : 0.0 , 1 : 0.0 , 2 : 1.0 }})
165
- assert_frame_equal (get_dummies (s_list ), expected )
166
- assert_frame_equal (get_dummies (s_series ), expected )
167
+ assert_frame_equal (get_dummies (s_list , sparse = self . sparse ), expected )
168
+ assert_frame_equal (get_dummies (s_series , sparse = self . sparse ), expected )
167
169
168
170
expected .index = list ('ABC' )
169
- assert_frame_equal (get_dummies (s_series_index ), expected )
171
+ assert_frame_equal (get_dummies (s_series_index , sparse = self . sparse ), expected )
170
172
171
173
def test_just_na (self ):
172
174
just_na_list = [np .nan ]
173
175
just_na_series = Series (just_na_list )
174
176
just_na_series_index = Series (just_na_list , index = ['A' ])
175
177
176
- res_list = get_dummies (just_na_list )
177
- res_series = get_dummies (just_na_series )
178
- res_series_index = get_dummies (just_na_series_index )
178
+ res_list = get_dummies (just_na_list , sparse = self . sparse )
179
+ res_series = get_dummies (just_na_series , sparse = self . sparse )
180
+ res_series_index = get_dummies (just_na_series_index , sparse = self . sparse )
179
181
180
182
self .assertEqual (res_list .empty , True )
181
183
self .assertEqual (res_series .empty , True )
@@ -187,20 +189,21 @@ def test_just_na(self):
187
189
188
190
def test_include_na (self ):
189
191
s = ['a' , 'b' , np .nan ]
190
- res = get_dummies (s )
192
+ res = get_dummies (s , sparse = self . sparse )
191
193
exp = DataFrame ({'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
192
194
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 }})
193
195
assert_frame_equal (res , exp )
194
196
195
- res_na = get_dummies (s , dummy_na = True )
197
+ # Sparse dataframes do not allow nan labelled columns, see #GH8822
198
+ res_na = get_dummies (s , dummy_na = True , sparse = self .sparse )
196
199
exp_na = DataFrame ({nan : {0 : 0.0 , 1 : 0.0 , 2 : 1.0 },
197
200
'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
198
201
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 }}).reindex_axis (['a' , 'b' , nan ], 1 )
199
202
# hack (NaN handling in assert_index_equal)
200
203
exp_na .columns = res_na .columns
201
204
assert_frame_equal (res_na , exp_na )
202
205
203
- res_just_na = get_dummies ([nan ], dummy_na = True )
206
+ res_just_na = get_dummies ([nan ], dummy_na = True , sparse = self . sparse )
204
207
exp_just_na = DataFrame (Series (1.0 ,index = [0 ]),columns = [nan ])
205
208
assert_array_equal (res_just_na .values , exp_just_na .values )
206
209
@@ -209,21 +212,21 @@ def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values
209
212
e = 'e'
210
213
eacute = unicodedata .lookup ('LATIN SMALL LETTER E WITH ACUTE' )
211
214
s = [e , eacute , eacute ]
212
- res = get_dummies (s , prefix = 'letter' )
215
+ res = get_dummies (s , prefix = 'letter' , sparse = self . sparse )
213
216
exp = DataFrame ({'letter_e' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
214
217
u ('letter_%s' ) % eacute : {0 : 0.0 , 1 : 1.0 , 2 : 1.0 }})
215
218
assert_frame_equal (res , exp )
216
219
217
220
def test_dataframe_dummies_all_obj (self ):
218
221
df = self .df [['A' , 'B' ]]
219
- result = get_dummies (df )
222
+ result = get_dummies (df , sparse = self . sparse )
220
223
expected = DataFrame ({'A_a' : [1. , 0 , 1 ], 'A_b' : [0. , 1 , 0 ],
221
224
'B_b' : [1. , 1 , 0 ], 'B_c' : [0. , 0 , 1 ]})
222
225
assert_frame_equal (result , expected )
223
226
224
227
def test_dataframe_dummies_mix_default (self ):
225
228
df = self .df
226
- result = get_dummies (df )
229
+ result = get_dummies (df , sparse = self . sparse )
227
230
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
228
231
'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
229
232
'B_c' : [0. , 0 , 1 ]})
@@ -234,18 +237,18 @@ def test_dataframe_dummies_prefix_list(self):
234
237
prefixes = ['from_A' , 'from_B' ]
235
238
df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
236
239
'C' : [1 , 2 , 3 ]})
237
- result = get_dummies (df , prefix = prefixes )
240
+ result = get_dummies (df , prefix = prefixes , sparse = self . sparse )
238
241
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'from_A_a' : [1. , 0 , 1 ],
239
242
'from_A_b' : [0. , 1 , 0 ], 'from_B_b' : [1. , 1 , 0 ],
240
243
'from_B_c' : [0. , 0 , 1 ]})
241
244
expected = expected [['C' , 'from_A_a' , 'from_A_b' , 'from_B_b' ,
242
245
'from_B_c' ]]
243
246
assert_frame_equal (result , expected )
244
247
245
- def test_datafrmae_dummies_prefix_str (self ):
248
+ def test_dataframe_dummies_prefix_str (self ):
246
249
# not that you should do this...
247
250
df = self .df
248
- result = get_dummies (df , prefix = 'bad' )
251
+ result = get_dummies (df , prefix = 'bad' , sparse = self . sparse )
249
252
expected = DataFrame ([[1 , 1. , 0. , 1. , 0. ],
250
253
[2 , 0. , 1. , 1. , 0. ],
251
254
[3 , 1. , 0. , 0. , 1. ]],
@@ -255,40 +258,40 @@ def test_datafrmae_dummies_prefix_str(self):
255
258
def test_dataframe_dummies_subset (self ):
256
259
df = self .df
257
260
result = get_dummies (df , prefix = ['from_A' ],
258
- columns = ['A' ])
261
+ columns = ['A' ], sparse = self . sparse )
259
262
expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
260
263
'B' : ['b' , 'b' , 'c' ], 'C' : [1 , 2 , 3 ]})
261
264
assert_frame_equal (result , expected )
262
265
263
266
def test_dataframe_dummies_prefix_sep (self ):
264
267
df = self .df
265
- result = get_dummies (df , prefix_sep = '..' )
268
+ result = get_dummies (df , prefix_sep = '..' , sparse = self . sparse )
266
269
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A..a' : [1. , 0 , 1 ],
267
270
'A..b' : [0. , 1 , 0 ], 'B..b' : [1. , 1 , 0 ],
268
271
'B..c' : [0. , 0 , 1 ]})
269
272
expected = expected [['C' , 'A..a' , 'A..b' , 'B..b' , 'B..c' ]]
270
273
assert_frame_equal (result , expected )
271
274
272
- result = get_dummies (df , prefix_sep = ['..' , '__' ])
275
+ result = get_dummies (df , prefix_sep = ['..' , '__' ], sparse = self . sparse )
273
276
expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
274
277
assert_frame_equal (result , expected )
275
278
276
- result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' })
279
+ result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' }, sparse = self . sparse )
277
280
assert_frame_equal (result , expected )
278
281
279
282
def test_dataframe_dummies_prefix_bad_length (self ):
280
283
with tm .assertRaises (ValueError ):
281
- get_dummies (self .df , prefix = ['too few' ])
284
+ get_dummies (self .df , prefix = ['too few' ], sparse = self . sparse )
282
285
283
286
def test_dataframe_dummies_prefix_sep_bad_length (self ):
284
287
with tm .assertRaises (ValueError ):
285
- get_dummies (self .df , prefix_sep = ['bad' ])
288
+ get_dummies (self .df , prefix_sep = ['bad' ], sparse = self . sparse )
286
289
287
290
def test_dataframe_dummies_prefix_dict (self ):
288
291
prefixes = {'A' : 'from_A' , 'B' : 'from_B' }
289
292
df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
290
293
'C' : [1 , 2 , 3 ]})
291
- result = get_dummies (df , prefix = prefixes )
294
+ result = get_dummies (df , prefix = prefixes , sparse = self . sparse )
292
295
expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
293
296
'from_B_b' : [1. , 1 , 0 ], 'from_B_c' : [0. , 0 , 1 ],
294
297
'C' : [1 , 2 , 3 ]})
@@ -297,22 +300,22 @@ def test_dataframe_dummies_prefix_dict(self):
297
300
def test_dataframe_dummies_with_na (self ):
298
301
df = self .df
299
302
df .loc [3 , :] = [np .nan , np .nan , np .nan ]
300
- result = get_dummies (df , dummy_na = True )
303
+ result = get_dummies (df , dummy_na = True , sparse = self . sparse )
301
304
expected = DataFrame ({'C' : [1 , 2 , 3 , np .nan ], 'A_a' : [1. , 0 , 1 , 0 ],
302
305
'A_b' : [0. , 1 , 0 , 0 ], 'A_nan' : [0. , 0 , 0 , 1 ], 'B_b' : [1. , 1 , 0 , 0 ],
303
306
'B_c' : [0. , 0 , 1 , 0 ], 'B_nan' : [0. , 0 , 0 , 1 ]})
304
307
expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' , 'B_b' , 'B_c' ,
305
308
'B_nan' ]]
306
309
assert_frame_equal (result , expected )
307
310
308
- result = get_dummies (df , dummy_na = False )
311
+ result = get_dummies (df , dummy_na = False , sparse = self . sparse )
309
312
expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
310
313
assert_frame_equal (result , expected )
311
314
312
315
def test_dataframe_dummies_with_categorical (self ):
313
316
df = self .df
314
317
df ['cat' ] = pd .Categorical (['x' , 'y' , 'y' ])
315
- result = get_dummies (df )
318
+ result = get_dummies (df , sparse = self . sparse )
316
319
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
317
320
'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
318
321
'B_c' : [0. , 0 , 1 ], 'cat_x' : [1. , 0 , 0 ],
@@ -321,6 +324,8 @@ def test_dataframe_dummies_with_categorical(self):
321
324
'cat_x' , 'cat_y' ]]
322
325
assert_frame_equal (result , expected )
323
326
327
+ class TestGetDummiesSparse (TestGetDummies ):
328
+ sparse = True
324
329
325
330
class TestConvertDummies (tm .TestCase ):
326
331
def test_convert_dummies (self ):
0 commit comments