@@ -151,6 +151,8 @@ def test_multiindex(self):
151
151
152
152
class TestGetDummies (tm .TestCase ):
153
153
154
+ sparse = False
155
+
154
156
def setUp (self ):
155
157
self .df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
156
158
'C' : [1 , 2 , 3 ]})
@@ -163,20 +165,20 @@ def test_basic(self):
163
165
expected = DataFrame ({'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
164
166
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 },
165
167
'c' : {0 : 0.0 , 1 : 0.0 , 2 : 1.0 }})
166
- assert_frame_equal (get_dummies (s_list ), expected )
167
- assert_frame_equal (get_dummies (s_series ), expected )
168
+ assert_frame_equal (get_dummies (s_list , sparse = self . sparse ), expected )
169
+ assert_frame_equal (get_dummies (s_series , sparse = self . sparse ), expected )
168
170
169
171
expected .index = list ('ABC' )
170
- assert_frame_equal (get_dummies (s_series_index ), expected )
172
+ assert_frame_equal (get_dummies (s_series_index , sparse = self . sparse ), expected )
171
173
172
174
def test_just_na (self ):
173
175
just_na_list = [np .nan ]
174
176
just_na_series = Series (just_na_list )
175
177
just_na_series_index = Series (just_na_list , index = ['A' ])
176
178
177
- res_list = get_dummies (just_na_list )
178
- res_series = get_dummies (just_na_series )
179
- res_series_index = get_dummies (just_na_series_index )
179
+ res_list = get_dummies (just_na_list , sparse = self . sparse )
180
+ res_series = get_dummies (just_na_series , sparse = self . sparse )
181
+ res_series_index = get_dummies (just_na_series_index , sparse = self . sparse )
180
182
181
183
self .assertEqual (res_list .empty , True )
182
184
self .assertEqual (res_series .empty , True )
@@ -188,20 +190,21 @@ def test_just_na(self):
188
190
189
191
def test_include_na (self ):
190
192
s = ['a' , 'b' , np .nan ]
191
- res = get_dummies (s )
193
+ res = get_dummies (s , sparse = self . sparse )
192
194
exp = DataFrame ({'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
193
195
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 }})
194
196
assert_frame_equal (res , exp )
195
197
196
- res_na = get_dummies (s , dummy_na = True )
198
+ # Sparse dataframes do not allow nan labelled columns, see #GH8822
199
+ res_na = get_dummies (s , dummy_na = True , sparse = self .sparse )
197
200
exp_na = DataFrame ({nan : {0 : 0.0 , 1 : 0.0 , 2 : 1.0 },
198
201
'a' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
199
202
'b' : {0 : 0.0 , 1 : 1.0 , 2 : 0.0 }}).reindex_axis (['a' , 'b' , nan ], 1 )
200
203
# hack (NaN handling in assert_index_equal)
201
204
exp_na .columns = res_na .columns
202
205
assert_frame_equal (res_na , exp_na )
203
206
204
- res_just_na = get_dummies ([nan ], dummy_na = True )
207
+ res_just_na = get_dummies ([nan ], dummy_na = True , sparse = self . sparse )
205
208
exp_just_na = DataFrame (Series (1.0 ,index = [0 ]),columns = [nan ])
206
209
assert_array_equal (res_just_na .values , exp_just_na .values )
207
210
@@ -210,21 +213,21 @@ def test_unicode(self): # See GH 6885 - get_dummies chokes on unicode values
210
213
e = 'e'
211
214
eacute = unicodedata .lookup ('LATIN SMALL LETTER E WITH ACUTE' )
212
215
s = [e , eacute , eacute ]
213
- res = get_dummies (s , prefix = 'letter' )
216
+ res = get_dummies (s , prefix = 'letter' , sparse = self . sparse )
214
217
exp = DataFrame ({'letter_e' : {0 : 1.0 , 1 : 0.0 , 2 : 0.0 },
215
218
u ('letter_%s' ) % eacute : {0 : 0.0 , 1 : 1.0 , 2 : 1.0 }})
216
219
assert_frame_equal (res , exp )
217
220
218
221
def test_dataframe_dummies_all_obj (self ):
219
222
df = self .df [['A' , 'B' ]]
220
- result = get_dummies (df )
223
+ result = get_dummies (df , sparse = self . sparse )
221
224
expected = DataFrame ({'A_a' : [1. , 0 , 1 ], 'A_b' : [0. , 1 , 0 ],
222
225
'B_b' : [1. , 1 , 0 ], 'B_c' : [0. , 0 , 1 ]})
223
226
assert_frame_equal (result , expected )
224
227
225
228
def test_dataframe_dummies_mix_default (self ):
226
229
df = self .df
227
- result = get_dummies (df )
230
+ result = get_dummies (df , sparse = self . sparse )
228
231
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
229
232
'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
230
233
'B_c' : [0. , 0 , 1 ]})
@@ -235,18 +238,18 @@ def test_dataframe_dummies_prefix_list(self):
235
238
prefixes = ['from_A' , 'from_B' ]
236
239
df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
237
240
'C' : [1 , 2 , 3 ]})
238
- result = get_dummies (df , prefix = prefixes )
241
+ result = get_dummies (df , prefix = prefixes , sparse = self . sparse )
239
242
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'from_A_a' : [1. , 0 , 1 ],
240
243
'from_A_b' : [0. , 1 , 0 ], 'from_B_b' : [1. , 1 , 0 ],
241
244
'from_B_c' : [0. , 0 , 1 ]})
242
245
expected = expected [['C' , 'from_A_a' , 'from_A_b' , 'from_B_b' ,
243
246
'from_B_c' ]]
244
247
assert_frame_equal (result , expected )
245
248
246
- def test_datafrmae_dummies_prefix_str (self ):
249
+ def test_dataframe_dummies_prefix_str (self ):
247
250
# not that you should do this...
248
251
df = self .df
249
- result = get_dummies (df , prefix = 'bad' )
252
+ result = get_dummies (df , prefix = 'bad' , sparse = self . sparse )
250
253
expected = DataFrame ([[1 , 1. , 0. , 1. , 0. ],
251
254
[2 , 0. , 1. , 1. , 0. ],
252
255
[3 , 1. , 0. , 0. , 1. ]],
@@ -256,40 +259,40 @@ def test_datafrmae_dummies_prefix_str(self):
256
259
def test_dataframe_dummies_subset (self ):
257
260
df = self .df
258
261
result = get_dummies (df , prefix = ['from_A' ],
259
- columns = ['A' ])
262
+ columns = ['A' ], sparse = self . sparse )
260
263
expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
261
264
'B' : ['b' , 'b' , 'c' ], 'C' : [1 , 2 , 3 ]})
262
265
assert_frame_equal (result , expected )
263
266
264
267
def test_dataframe_dummies_prefix_sep (self ):
265
268
df = self .df
266
- result = get_dummies (df , prefix_sep = '..' )
269
+ result = get_dummies (df , prefix_sep = '..' , sparse = self . sparse )
267
270
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A..a' : [1. , 0 , 1 ],
268
271
'A..b' : [0. , 1 , 0 ], 'B..b' : [1. , 1 , 0 ],
269
272
'B..c' : [0. , 0 , 1 ]})
270
273
expected = expected [['C' , 'A..a' , 'A..b' , 'B..b' , 'B..c' ]]
271
274
assert_frame_equal (result , expected )
272
275
273
- result = get_dummies (df , prefix_sep = ['..' , '__' ])
276
+ result = get_dummies (df , prefix_sep = ['..' , '__' ], sparse = self . sparse )
274
277
expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
275
278
assert_frame_equal (result , expected )
276
279
277
- result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' })
280
+ result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' }, sparse = self . sparse )
278
281
assert_frame_equal (result , expected )
279
282
280
283
def test_dataframe_dummies_prefix_bad_length (self ):
281
284
with tm .assertRaises (ValueError ):
282
- get_dummies (self .df , prefix = ['too few' ])
285
+ get_dummies (self .df , prefix = ['too few' ], sparse = self . sparse )
283
286
284
287
def test_dataframe_dummies_prefix_sep_bad_length (self ):
285
288
with tm .assertRaises (ValueError ):
286
- get_dummies (self .df , prefix_sep = ['bad' ])
289
+ get_dummies (self .df , prefix_sep = ['bad' ], sparse = self . sparse )
287
290
288
291
def test_dataframe_dummies_prefix_dict (self ):
289
292
prefixes = {'A' : 'from_A' , 'B' : 'from_B' }
290
293
df = DataFrame ({'A' : ['a' , 'b' , 'a' ], 'B' : ['b' , 'b' , 'c' ],
291
294
'C' : [1 , 2 , 3 ]})
292
- result = get_dummies (df , prefix = prefixes )
295
+ result = get_dummies (df , prefix = prefixes , sparse = self . sparse )
293
296
expected = DataFrame ({'from_A_a' : [1. , 0 , 1 ], 'from_A_b' : [0. , 1 , 0 ],
294
297
'from_B_b' : [1. , 1 , 0 ], 'from_B_c' : [0. , 0 , 1 ],
295
298
'C' : [1 , 2 , 3 ]})
@@ -298,22 +301,22 @@ def test_dataframe_dummies_prefix_dict(self):
298
301
def test_dataframe_dummies_with_na (self ):
299
302
df = self .df
300
303
df .loc [3 , :] = [np .nan , np .nan , np .nan ]
301
- result = get_dummies (df , dummy_na = True )
304
+ result = get_dummies (df , dummy_na = True , sparse = self . sparse )
302
305
expected = DataFrame ({'C' : [1 , 2 , 3 , np .nan ], 'A_a' : [1. , 0 , 1 , 0 ],
303
306
'A_b' : [0. , 1 , 0 , 0 ], 'A_nan' : [0. , 0 , 0 , 1 ], 'B_b' : [1. , 1 , 0 , 0 ],
304
307
'B_c' : [0. , 0 , 1 , 0 ], 'B_nan' : [0. , 0 , 0 , 1 ]})
305
308
expected = expected [['C' , 'A_a' , 'A_b' , 'A_nan' , 'B_b' , 'B_c' ,
306
309
'B_nan' ]]
307
310
assert_frame_equal (result , expected )
308
311
309
- result = get_dummies (df , dummy_na = False )
312
+ result = get_dummies (df , dummy_na = False , sparse = self . sparse )
310
313
expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
311
314
assert_frame_equal (result , expected )
312
315
313
316
def test_dataframe_dummies_with_categorical (self ):
314
317
df = self .df
315
318
df ['cat' ] = pd .Categorical (['x' , 'y' , 'y' ])
316
- result = get_dummies (df )
319
+ result = get_dummies (df , sparse = self . sparse )
317
320
expected = DataFrame ({'C' : [1 , 2 , 3 ], 'A_a' : [1. , 0 , 1 ],
318
321
'A_b' : [0. , 1 , 0 ], 'B_b' : [1. , 1 , 0 ],
319
322
'B_c' : [0. , 0 , 1 ], 'cat_x' : [1. , 0 , 0 ],
@@ -322,6 +325,11 @@ def test_dataframe_dummies_with_categorical(self):
322
325
'cat_x' , 'cat_y' ]]
323
326
assert_frame_equal (result , expected )
324
327
328
+
329
+ class TestGetDummiesSparse (TestGetDummies ):
330
+ sparse = True
331
+
332
+
325
333
class TestLreshape (tm .TestCase ):
326
334
327
335
def test_pairs (self ):
0 commit comments