@@ -224,7 +224,7 @@ def df(self):
224
224
'B' : ['b' , 'b' , 'c' ],
225
225
'C' : [1 , 2 , 3 ]})
226
226
227
- @pytest .fixture (params = ['uint8' , 'int64 ' , np .float64 , bool , None ])
227
+ @pytest .fixture (params = ['uint8' , 'i8 ' , np .float64 , bool , None ])
228
228
def dtype (self , request ):
229
229
return np .dtype (request .param )
230
230
@@ -240,7 +240,7 @@ def effective_dtype(self, dtype):
240
240
return dtype
241
241
242
242
def test_throws_on_dtype_object (self , df ):
243
- with pytest .raises (TypeError ):
243
+ with pytest .raises (ValueError ):
244
244
get_dummies (df , dtype = 'object' )
245
245
246
246
def test_basic (self , sparse , dtype ):
@@ -347,26 +347,26 @@ def test_include_na(self, sparse, dtype):
347
347
dtype = self .effective_dtype (dtype ))
348
348
tm .assert_numpy_array_equal (res_just_na .values , exp_just_na .values )
349
349
350
- def test_unicode (self , sparse , dtype ):
350
+ def test_unicode (self , sparse ):
351
351
# See GH 6885 - get_dummies chokes on unicode values
352
352
import unicodedata
353
353
e = 'e'
354
354
eacute = unicodedata .lookup ('LATIN SMALL LETTER E WITH ACUTE' )
355
355
s = [e , eacute , eacute ]
356
- res = get_dummies (s , prefix = 'letter' , sparse = sparse , dtype = dtype )
356
+ res = get_dummies (s , prefix = 'letter' , sparse = sparse )
357
357
exp = DataFrame ({'letter_e' : [1 , 0 , 0 ],
358
358
u ('letter_%s' ) % eacute : [0 , 1 , 1 ]},
359
- dtype = self . effective_dtype ( dtype ) )
359
+ dtype = np . uint8 )
360
360
assert_frame_equal (res , exp )
361
361
362
- def test_dataframe_dummies_all_obj (self , df , sparse , dtype ):
362
+ def test_dataframe_dummies_all_obj (self , df , sparse ):
363
363
df = df [['A' , 'B' ]]
364
- result = get_dummies (df , sparse = sparse , dtype = dtype )
364
+ result = get_dummies (df , sparse = sparse )
365
365
expected = DataFrame ({'A_a' : [1 , 0 , 1 ],
366
366
'A_b' : [0 , 1 , 0 ],
367
367
'B_b' : [1 , 1 , 0 ],
368
368
'B_c' : [0 , 0 , 1 ]},
369
- dtype = self . effective_dtype ( dtype ) )
369
+ dtype = np . uint8 )
370
370
assert_frame_equal (result , expected )
371
371
372
372
def test_dataframe_dummies_mix_default (self , df , sparse , dtype ):
@@ -381,30 +381,30 @@ def test_dataframe_dummies_mix_default(self, df, sparse, dtype):
381
381
expected = expected [['C' , 'A_a' , 'A_b' , 'B_b' , 'B_c' ]]
382
382
assert_frame_equal (result , expected )
383
383
384
- def test_dataframe_dummies_prefix_list (self , df , sparse , dtype ):
384
+ def test_dataframe_dummies_prefix_list (self , df , sparse ):
385
385
prefixes = ['from_A' , 'from_B' ]
386
- result = get_dummies (df , prefix = prefixes , sparse = sparse , dtype = dtype )
386
+ result = get_dummies (df , prefix = prefixes , sparse = sparse )
387
387
expected = DataFrame ({'C' : [1 , 2 , 3 ],
388
388
'from_A_a' : [1 , 0 , 1 ],
389
389
'from_A_b' : [0 , 1 , 0 ],
390
390
'from_B_b' : [1 , 1 , 0 ],
391
- 'from_B_c' : [0 , 0 , 1 ]})
392
- cols = expected . columns [ 1 :]
393
- expected [cols ] = expected [ cols ]. astype ( dtype )
391
+ 'from_B_c' : [0 , 0 , 1 ]},
392
+ dtype = np . uint8 )
393
+ expected [[ 'C' ]] = df [[ 'C' ]]
394
394
expected = expected [['C' , 'from_A_a' , 'from_A_b' ,
395
395
'from_B_b' , 'from_B_c' ]]
396
396
assert_frame_equal (result , expected )
397
397
398
- def test_dataframe_dummies_prefix_str (self , df , sparse , dtype ):
398
+ def test_dataframe_dummies_prefix_str (self , df , sparse ):
399
399
# not that you should do this...
400
- result = get_dummies (df , prefix = 'bad' , sparse = sparse , dtype = dtype )
400
+ result = get_dummies (df , prefix = 'bad' , sparse = sparse )
401
401
bad_columns = ['bad_a' , 'bad_b' , 'bad_b' , 'bad_c' ]
402
402
expected = DataFrame ([[1 , 1 , 0 , 1 , 0 ],
403
403
[2 , 0 , 1 , 1 , 0 ],
404
404
[3 , 1 , 0 , 0 , 1 ]],
405
405
columns = ['C' ] + bad_columns ,
406
- dtype = self . effective_dtype ( dtype ) )
407
- expected ['C' ] = [ 1 , 2 , 3 ]
406
+ dtype = np . uint8 )
407
+ expected [[ 'C' ]] = df [[ 'C' ] ]
408
408
assert_frame_equal (result , expected )
409
409
410
410
def test_dataframe_dummies_subset (self , df , sparse , dtype ):
@@ -418,25 +418,24 @@ def test_dataframe_dummies_subset(self, df, sparse, dtype):
418
418
expected [columns ] = expected [columns ].astype (dtype )
419
419
assert_frame_equal (result , expected )
420
420
421
- def test_dataframe_dummies_prefix_sep (self , df , sparse , dtype ):
422
- result = get_dummies (df , prefix_sep = '..' , sparse = sparse , dtype = dtype )
421
+ def test_dataframe_dummies_prefix_sep (self , df , sparse ):
422
+ result = get_dummies (df , prefix_sep = '..' , sparse = sparse )
423
423
expected = DataFrame ({'C' : [1 , 2 , 3 ],
424
424
'A..a' : [1 , 0 , 1 ],
425
425
'A..b' : [0 , 1 , 0 ],
426
426
'B..b' : [1 , 1 , 0 ],
427
- 'B..c' : [0 , 0 , 1 ]})
427
+ 'B..c' : [0 , 0 , 1 ]},
428
+ dtype = np .uint8 )
429
+ expected [['C' ]] = df [['C' ]]
428
430
expected = expected [['C' , 'A..a' , 'A..b' , 'B..b' , 'B..c' ]]
429
- cols = expected .columns [1 :]
430
- expected [cols ] = expected [cols ].astype (self .effective_dtype (dtype ))
431
431
assert_frame_equal (result , expected )
432
432
433
- result = get_dummies (df , prefix_sep = ['..' , '__' ],
434
- sparse = sparse , dtype = dtype )
433
+ result = get_dummies (df , prefix_sep = ['..' , '__' ], sparse = sparse )
435
434
expected = expected .rename (columns = {'B..b' : 'B__b' , 'B..c' : 'B__c' })
436
435
assert_frame_equal (result , expected )
437
436
438
437
result = get_dummies (df , prefix_sep = {'A' : '..' , 'B' : '__' },
439
- sparse = sparse , dtype = dtype )
438
+ sparse = sparse )
440
439
assert_frame_equal (result , expected )
441
440
442
441
def test_dataframe_dummies_prefix_bad_length (self , df , sparse ):
@@ -447,12 +446,12 @@ def test_dataframe_dummies_prefix_sep_bad_length(self, df, sparse):
447
446
with pytest .raises (ValueError ):
448
447
get_dummies (df , prefix_sep = ['bad' ], sparse = sparse )
449
448
450
- def test_dataframe_dummies_prefix_dict (self , sparse , dtype ):
449
+ def test_dataframe_dummies_prefix_dict (self , sparse ):
451
450
prefixes = {'A' : 'from_A' , 'B' : 'from_B' }
452
451
df = DataFrame ({'A' : ['a' , 'b' , 'a' ],
453
452
'B' : ['b' , 'b' , 'c' ],
454
453
'C' : [1 , 2 , 3 ]})
455
- result = get_dummies (df , prefix = prefixes , sparse = sparse , dtype = dtype )
454
+ result = get_dummies (df , prefix = prefixes , sparse = sparse )
456
455
457
456
expected = DataFrame ({'from_A_a' : [1 , 0 , 1 ],
458
457
'from_A_b' : [0 , 1 , 0 ],
@@ -461,8 +460,7 @@ def test_dataframe_dummies_prefix_dict(self, sparse, dtype):
461
460
'C' : [1 , 2 , 3 ]})
462
461
463
462
columns = ['from_A_a' , 'from_A_b' , 'from_B_b' , 'from_B_c' ]
464
- effective_dtype = self .effective_dtype (dtype )
465
- expected [columns ] = expected [columns ].astype (effective_dtype )
463
+ expected [columns ] = expected [columns ].astype (np .uint8 )
466
464
assert_frame_equal (result , expected )
467
465
468
466
def test_dataframe_dummies_with_na (self , df , sparse , dtype ):
@@ -610,23 +608,23 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse, dtype):
610
608
expected = expected [['C' , 'A_b' , 'B_c' ]]
611
609
assert_frame_equal (result , expected )
612
610
613
- def test_int_int (self , dtype ):
611
+ def test_int_int (self ):
614
612
data = Series ([1 , 2 , 1 ])
615
- result = pd .get_dummies (data , dtype = dtype )
613
+ result = pd .get_dummies (data )
616
614
expected = DataFrame ([[1 , 0 ],
617
615
[0 , 1 ],
618
616
[1 , 0 ]],
619
617
columns = [1 , 2 ],
620
- dtype = self . effective_dtype ( dtype ) )
618
+ dtype = np . uint8 )
621
619
tm .assert_frame_equal (result , expected )
622
620
623
621
data = Series (pd .Categorical (['a' , 'b' , 'a' ]))
624
- result = pd .get_dummies (data , dtype = dtype )
622
+ result = pd .get_dummies (data )
625
623
expected = DataFrame ([[1 , 0 ],
626
624
[0 , 1 ],
627
625
[1 , 0 ]],
628
626
columns = pd .Categorical (['a' , 'b' ]),
629
- dtype = self . effective_dtype ( dtype ) )
627
+ dtype = np . uint8 )
630
628
tm .assert_frame_equal (result , expected )
631
629
632
630
def test_int_df (self , dtype ):
0 commit comments