@@ -171,7 +171,7 @@ def test_get_dummies_unicode(self, sparse):
171
171
s = [e , eacute , eacute ]
172
172
res = get_dummies (s , prefix = "letter" , sparse = sparse )
173
173
exp = DataFrame (
174
- {"letter_e" : [1 , 0 , 0 ], f"letter_{ eacute } " : [0 , 1 , 1 ]}, dtype = np . uint8
174
+ {"letter_e" : [True , False , False ], f"letter_{ eacute } " : [False , True , True ]}
175
175
)
176
176
if sparse :
177
177
exp = exp .apply (SparseArray , fill_value = 0 )
@@ -182,15 +182,15 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
182
182
result = get_dummies (df , sparse = sparse )
183
183
expected = DataFrame (
184
184
{"A_a" : [1 , 0 , 1 ], "A_b" : [0 , 1 , 0 ], "B_b" : [1 , 1 , 0 ], "B_c" : [0 , 0 , 1 ]},
185
- dtype = np . uint8 ,
185
+ dtype = bool ,
186
186
)
187
187
if sparse :
188
188
expected = DataFrame (
189
189
{
190
- "A_a" : SparseArray ([1 , 0 , 1 ], dtype = "uint8 " ),
191
- "A_b" : SparseArray ([0 , 1 , 0 ], dtype = "uint8 " ),
192
- "B_b" : SparseArray ([1 , 1 , 0 ], dtype = "uint8 " ),
193
- "B_c" : SparseArray ([0 , 0 , 1 ], dtype = "uint8 " ),
190
+ "A_a" : SparseArray ([1 , 0 , 1 ], dtype = "bool " ),
191
+ "A_b" : SparseArray ([0 , 1 , 0 ], dtype = "bool " ),
192
+ "B_b" : SparseArray ([1 , 1 , 0 ], dtype = "bool " ),
193
+ "B_c" : SparseArray ([0 , 0 , 1 ], dtype = "bool " ),
194
194
}
195
195
)
196
196
@@ -208,7 +208,7 @@ def test_dataframe_dummies_string_dtype(self, df):
208
208
"B_b" : [1 , 1 , 0 ],
209
209
"B_c" : [0 , 0 , 1 ],
210
210
},
211
- dtype = np . uint8 ,
211
+ dtype = bool ,
212
212
)
213
213
tm .assert_frame_equal (result , expected )
214
214
@@ -238,12 +238,11 @@ def test_dataframe_dummies_prefix_list(self, df, sparse):
238
238
expected = DataFrame (
239
239
{
240
240
"C" : [1 , 2 , 3 ],
241
- "from_A_a" : [1 , 0 , 1 ],
242
- "from_A_b" : [0 , 1 , 0 ],
243
- "from_B_b" : [1 , 1 , 0 ],
244
- "from_B_c" : [0 , 0 , 1 ],
241
+ "from_A_a" : [True , False , True ],
242
+ "from_A_b" : [False , True , False ],
243
+ "from_B_b" : [True , True , False ],
244
+ "from_B_c" : [False , False , True ],
245
245
},
246
- dtype = np .uint8 ,
247
246
)
248
247
expected [["C" ]] = df [["C" ]]
249
248
cols = ["from_A_a" , "from_A_b" , "from_B_b" , "from_B_c" ]
@@ -258,9 +257,12 @@ def test_dataframe_dummies_prefix_str(self, df, sparse):
258
257
result = get_dummies (df , prefix = "bad" , sparse = sparse )
259
258
bad_columns = ["bad_a" , "bad_b" , "bad_b" , "bad_c" ]
260
259
expected = DataFrame (
261
- [[1 , 1 , 0 , 1 , 0 ], [2 , 0 , 1 , 1 , 0 ], [3 , 1 , 0 , 0 , 1 ]],
260
+ [
261
+ [1 , True , False , True , False ],
262
+ [2 , False , True , True , False ],
263
+ [3 , True , False , False , True ],
264
+ ],
262
265
columns = ["C" ] + bad_columns ,
263
- dtype = np .uint8 ,
264
266
)
265
267
expected = expected .astype ({"C" : np .int64 })
266
268
if sparse :
@@ -269,10 +271,10 @@ def test_dataframe_dummies_prefix_str(self, df, sparse):
269
271
expected = pd .concat (
270
272
[
271
273
Series ([1 , 2 , 3 ], name = "C" ),
272
- Series ([1 , 0 , 1 ], name = "bad_a" , dtype = "Sparse[uint8 ]" ),
273
- Series ([0 , 1 , 0 ], name = "bad_b" , dtype = "Sparse[uint8 ]" ),
274
- Series ([1 , 1 , 0 ], name = "bad_b" , dtype = "Sparse[uint8 ]" ),
275
- Series ([0 , 0 , 1 ], name = "bad_c" , dtype = "Sparse[uint8 ]" ),
274
+ Series ([True , False , True ], name = "bad_a" , dtype = "Sparse[bool ]" ),
275
+ Series ([False , True , False ], name = "bad_b" , dtype = "Sparse[bool ]" ),
276
+ Series ([True , True , False ], name = "bad_b" , dtype = "Sparse[bool ]" ),
277
+ Series ([False , False , True ], name = "bad_c" , dtype = "Sparse[bool ]" ),
276
278
],
277
279
axis = 1 ,
278
280
)
@@ -290,30 +292,29 @@ def test_dataframe_dummies_subset(self, df, sparse):
290
292
},
291
293
)
292
294
cols = expected .columns
293
- expected [cols [1 :]] = expected [cols [1 :]].astype (np . uint8 )
295
+ expected [cols [1 :]] = expected [cols [1 :]].astype (bool )
294
296
expected [["C" ]] = df [["C" ]]
295
297
if sparse :
296
298
cols = ["from_A_a" , "from_A_b" ]
297
- expected [cols ] = expected [cols ].astype (SparseDtype ("uint8 " , 0 ))
299
+ expected [cols ] = expected [cols ].astype (SparseDtype ("bool " , 0 ))
298
300
tm .assert_frame_equal (result , expected )
299
301
300
302
def test_dataframe_dummies_prefix_sep (self , df , sparse ):
301
303
result = get_dummies (df , prefix_sep = ".." , sparse = sparse )
302
304
expected = DataFrame (
303
305
{
304
306
"C" : [1 , 2 , 3 ],
305
- "A..a" : [1 , 0 , 1 ],
306
- "A..b" : [0 , 1 , 0 ],
307
- "B..b" : [1 , 1 , 0 ],
308
- "B..c" : [0 , 0 , 1 ],
307
+ "A..a" : [True , False , True ],
308
+ "A..b" : [False , True , False ],
309
+ "B..b" : [True , True , False ],
310
+ "B..c" : [False , False , True ],
309
311
},
310
- dtype = np .uint8 ,
311
312
)
312
313
expected [["C" ]] = df [["C" ]]
313
314
expected = expected [["C" , "A..a" , "A..b" , "B..b" , "B..c" ]]
314
315
if sparse :
315
316
cols = ["A..a" , "A..b" , "B..b" , "B..c" ]
316
- expected [cols ] = expected [cols ].astype (SparseDtype ("uint8 " , 0 ))
317
+ expected [cols ] = expected [cols ].astype (SparseDtype ("bool " , 0 ))
317
318
318
319
tm .assert_frame_equal (result , expected )
319
320
@@ -356,9 +357,9 @@ def test_dataframe_dummies_prefix_dict(self, sparse):
356
357
)
357
358
358
359
columns = ["from_A_a" , "from_A_b" , "from_B_b" , "from_B_c" ]
359
- expected [columns ] = expected [columns ].astype (np . uint8 )
360
+ expected [columns ] = expected [columns ].astype (bool )
360
361
if sparse :
361
- expected [columns ] = expected [columns ].astype (SparseDtype ("uint8 " , 0 ))
362
+ expected [columns ] = expected [columns ].astype (SparseDtype ("bool " , 0 ))
362
363
363
364
tm .assert_frame_equal (result , expected )
364
365
@@ -422,19 +423,19 @@ def test_dataframe_dummies_with_categorical(self, df, sparse, dtype):
422
423
[
423
424
(
424
425
{"data" : DataFrame ({"ä" : ["a" ]})},
425
- DataFrame ({"ä_a" : [1 ]}, dtype = np . uint8 ),
426
+ DataFrame ({"ä_a" : [True ]} ),
426
427
),
427
428
(
428
429
{"data" : DataFrame ({"x" : ["ä" ]})},
429
- DataFrame ({"x_ä" : [1 ]}, dtype = np . uint8 ),
430
+ DataFrame ({"x_ä" : [True ]} ),
430
431
),
431
432
(
432
433
{"data" : DataFrame ({"x" : ["a" ]}), "prefix" : "ä" },
433
- DataFrame ({"ä_a" : [1 ]}, dtype = np . uint8 ),
434
+ DataFrame ({"ä_a" : [True ]} ),
434
435
),
435
436
(
436
437
{"data" : DataFrame ({"x" : ["a" ]}), "prefix_sep" : "ä" },
437
- DataFrame ({"xäa" : [1 ]}, dtype = np . uint8 ),
438
+ DataFrame ({"xäa" : [True ]} ),
438
439
),
439
440
],
440
441
)
@@ -451,7 +452,7 @@ def test_get_dummies_basic_drop_first(self, sparse):
451
452
s_series = Series (s_list )
452
453
s_series_index = Series (s_list , list ("ABC" ))
453
454
454
- expected = DataFrame ({"b" : [0 , 1 , 0 ], "c" : [0 , 0 , 1 ]}, dtype = np . uint8 )
455
+ expected = DataFrame ({"b" : [0 , 1 , 0 ], "c" : [0 , 0 , 1 ]}, dtype = bool )
455
456
456
457
result = get_dummies (s_list , drop_first = True , sparse = sparse )
457
458
if sparse :
@@ -487,14 +488,14 @@ def test_get_dummies_basic_drop_first_NA(self, sparse):
487
488
# Test NA handling together with drop_first
488
489
s_NA = ["a" , "b" , np .nan ]
489
490
res = get_dummies (s_NA , drop_first = True , sparse = sparse )
490
- exp = DataFrame ({"b" : [0 , 1 , 0 ]}, dtype = np . uint8 )
491
+ exp = DataFrame ({"b" : [0 , 1 , 0 ]}, dtype = bool )
491
492
if sparse :
492
493
exp = exp .apply (SparseArray , fill_value = 0 )
493
494
494
495
tm .assert_frame_equal (res , exp )
495
496
496
497
res_na = get_dummies (s_NA , dummy_na = True , drop_first = True , sparse = sparse )
497
- exp_na = DataFrame ({"b" : [0 , 1 , 0 ], np .nan : [0 , 0 , 1 ]}, dtype = np . uint8 ).reindex (
498
+ exp_na = DataFrame ({"b" : [0 , 1 , 0 ], np .nan : [0 , 0 , 1 ]}, dtype = bool ).reindex (
498
499
["b" , np .nan ], axis = 1
499
500
)
500
501
if sparse :
@@ -510,7 +511,7 @@ def test_get_dummies_basic_drop_first_NA(self, sparse):
510
511
def test_dataframe_dummies_drop_first (self , df , sparse ):
511
512
df = df [["A" , "B" ]]
512
513
result = get_dummies (df , drop_first = True , sparse = sparse )
513
- expected = DataFrame ({"A_b" : [0 , 1 , 0 ], "B_c" : [0 , 0 , 1 ]}, dtype = np . uint8 )
514
+ expected = DataFrame ({"A_b" : [0 , 1 , 0 ], "B_c" : [0 , 0 , 1 ]}, dtype = bool )
514
515
if sparse :
515
516
expected = expected .apply (SparseArray , fill_value = 0 )
516
517
tm .assert_frame_equal (result , expected )
@@ -522,7 +523,7 @@ def test_dataframe_dummies_drop_first_with_categorical(self, df, sparse, dtype):
522
523
{"C" : [1 , 2 , 3 ], "A_b" : [0 , 1 , 0 ], "B_c" : [0 , 0 , 1 ], "cat_y" : [0 , 1 , 1 ]}
523
524
)
524
525
cols = ["A_b" , "B_c" , "cat_y" ]
525
- expected [cols ] = expected [cols ].astype (np . uint8 )
526
+ expected [cols ] = expected [cols ].astype (bool )
526
527
expected = expected [["C" , "A_b" , "B_c" , "cat_y" ]]
527
528
if sparse :
528
529
for col in cols :
@@ -544,7 +545,7 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
544
545
}
545
546
)
546
547
cols = ["A_b" , "A_nan" , "B_c" , "B_nan" ]
547
- expected [cols ] = expected [cols ].astype (np . uint8 )
548
+ expected [cols ] = expected [cols ].astype (bool )
548
549
expected = expected .sort_index (axis = 1 )
549
550
if sparse :
550
551
for col in cols :
@@ -559,13 +560,13 @@ def test_dataframe_dummies_drop_first_with_na(self, df, sparse):
559
560
def test_get_dummies_int_int (self ):
560
561
data = Series ([1 , 2 , 1 ])
561
562
result = get_dummies (data )
562
- expected = DataFrame ([[1 , 0 ], [0 , 1 ], [1 , 0 ]], columns = [1 , 2 ], dtype = np . uint8 )
563
+ expected = DataFrame ([[1 , 0 ], [0 , 1 ], [1 , 0 ]], columns = [1 , 2 ], dtype = bool )
563
564
tm .assert_frame_equal (result , expected )
564
565
565
566
data = Series (Categorical (["a" , "b" , "a" ]))
566
567
result = get_dummies (data )
567
568
expected = DataFrame (
568
- [[1 , 0 ], [0 , 1 ], [1 , 0 ]], columns = Categorical (["a" , "b" ]), dtype = np . uint8
569
+ [[1 , 0 ], [0 , 1 ], [1 , 0 ]], columns = Categorical (["a" , "b" ]), dtype = bool
569
570
)
570
571
tm .assert_frame_equal (result , expected )
571
572
@@ -616,9 +617,12 @@ def test_get_dummies_duplicate_columns(self, df):
616
617
result = get_dummies (df ).sort_index (axis = 1 )
617
618
618
619
expected = DataFrame (
619
- [[1 , 1 , 0 , 1 , 0 ], [2 , 0 , 1 , 1 , 0 ], [3 , 1 , 0 , 0 , 1 ]],
620
+ [
621
+ [1 , True , False , True , False ],
622
+ [2 , False , True , True , False ],
623
+ [3 , True , False , False , True ],
624
+ ],
620
625
columns = ["A" , "A_a" , "A_b" , "A_b" , "A_c" ],
621
- dtype = np .uint8 ,
622
626
).sort_index (axis = 1 )
623
627
624
628
expected = expected .astype ({"A" : np .int64 })
@@ -628,7 +632,7 @@ def test_get_dummies_duplicate_columns(self, df):
628
632
def test_get_dummies_all_sparse (self ):
629
633
df = DataFrame ({"A" : [1 , 2 ]})
630
634
result = get_dummies (df , columns = ["A" ], sparse = True )
631
- dtype = SparseDtype ("uint8 " , 0 )
635
+ dtype = SparseDtype ("bool " , 0 )
632
636
expected = DataFrame (
633
637
{
634
638
"A_1" : SparseArray ([1 , 0 ], dtype = dtype ),
0 commit comments