5
5
from collections import OrderedDict
6
6
7
7
from pandas import DataFrame , Series
8
+ from pandas .core .dtypes .common import is_integer_dtype
8
9
from pandas .core .sparse .api import SparseDtype , SparseArray
9
10
import pandas as pd
10
11
@@ -54,23 +55,16 @@ def test_basic(self, sparse, dtype):
54
55
'b' : [0 , 1 , 0 ],
55
56
'c' : [0 , 0 , 1 ]},
56
57
dtype = self .effective_dtype (dtype ))
57
- result = get_dummies (s_list , sparse = sparse , dtype = dtype )
58
58
if sparse :
59
- tm .assert_sp_frame_equal (result ,
60
- expected .to_sparse (kind = 'integer' ,
61
- fill_value = 0 ))
62
- else :
63
- assert_frame_equal (result , expected )
59
+ expected = expected .apply (pd .SparseArray , fill_value = 0.0 )
60
+ result = get_dummies (s_list , sparse = sparse , dtype = dtype )
61
+ assert_frame_equal (result , expected )
64
62
65
63
result = get_dummies (s_series , sparse = sparse , dtype = dtype )
66
- if sparse :
67
- expected = expected .to_sparse (kind = 'integer' , fill_value = 0 )
68
64
assert_frame_equal (result , expected )
69
65
70
66
expected .index = list ('ABC' )
71
67
result = get_dummies (s_series_index , sparse = sparse , dtype = dtype )
72
- if sparse :
73
- expected .to_sparse (kind = 'integer' , fill_value = 0 )
74
68
assert_frame_equal (result , expected )
75
69
76
70
def test_basic_types (self , sparse , dtype ):
@@ -86,23 +80,27 @@ def test_basic_types(self, sparse, dtype):
86
80
'c' : [0 , 0 , 1 ]},
87
81
dtype = self .effective_dtype (dtype ),
88
82
columns = list ('abc' ))
89
- if not sparse :
90
- compare = tm .assert_frame_equal
91
- else :
92
- expected = expected .to_sparse (fill_value = 0 , kind = 'integer' )
93
- compare = tm .assert_sp_frame_equal
94
-
83
+ if sparse :
84
+ if is_integer_dtype (dtype ):
85
+ fill_value = 0
86
+ elif dtype == bool :
87
+ fill_value = False
88
+ else :
89
+ fill_value = 0.0
90
+
91
+ expected = expected .apply (SparseArray , fill_value = fill_value )
95
92
result = get_dummies (s_list , sparse = sparse , dtype = dtype )
96
- compare (result , expected )
93
+ tm . assert_frame_equal (result , expected )
97
94
98
95
result = get_dummies (s_series , sparse = sparse , dtype = dtype )
99
- compare (result , expected )
96
+ tm . assert_frame_equal (result , expected )
100
97
101
98
result = get_dummies (s_df , columns = s_df .columns ,
102
99
sparse = sparse , dtype = dtype )
103
100
if sparse :
104
- dtype_name = 'Sparse[{}, 0]' .format (
105
- self .effective_dtype (dtype ).name
101
+ dtype_name = 'Sparse[{}, {}]' .format (
102
+ self .effective_dtype (dtype ).name ,
103
+ fill_value
106
104
)
107
105
else :
108
106
dtype_name = self .effective_dtype (dtype ).name
@@ -137,14 +135,13 @@ def test_just_na(self, sparse):
137
135
assert res_series_index .index .tolist () == ['A' ]
138
136
139
137
def test_include_na (self , sparse , dtype ):
140
- if sparse :
141
- pytest .xfail (reason = 'nan in index is problematic (GH 16894)' )
142
-
143
138
s = ['a' , 'b' , np .nan ]
144
139
res = get_dummies (s , sparse = sparse , dtype = dtype )
145
140
exp = DataFrame ({'a' : [1 , 0 , 0 ],
146
141
'b' : [0 , 1 , 0 ]},
147
142
dtype = self .effective_dtype (dtype ))
143
+ if sparse :
144
+ exp = exp .apply (pd .SparseArray , fill_value = 0.0 )
148
145
assert_frame_equal (res , exp )
149
146
150
147
# Sparse dataframes do not allow nan labelled columns, see #GH8822
@@ -156,6 +153,8 @@ def test_include_na(self, sparse, dtype):
156
153
exp_na = exp_na .reindex (['a' , 'b' , nan ], axis = 1 )
157
154
# hack (NaN handling in assert_index_equal)
158
155
exp_na .columns = res_na .columns
156
+ if sparse :
157
+ exp_na = exp_na .apply (pd .SparseArray , fill_value = 0.0 )
159
158
assert_frame_equal (res_na , exp_na )
160
159
161
160
res_just_na = get_dummies ([nan ], dummy_na = True ,
@@ -175,10 +174,8 @@ def test_unicode(self, sparse):
175
174
u ('letter_%s' ) % eacute : [0 , 1 , 1 ]},
176
175
dtype = np .uint8 )
177
176
if sparse :
178
- tm .assert_sp_frame_equal (res , exp .to_sparse (fill_value = 0 ,
179
- kind = 'integer' ))
180
- else :
181
- assert_frame_equal (res , exp )
177
+ exp = exp .apply (pd .SparseArray , fill_value = 0 )
178
+ assert_frame_equal (res , exp )
182
179
183
180
def test_dataframe_dummies_all_obj (self , df , sparse ):
184
181
df = df [['A' , 'B' ]]
@@ -189,16 +186,14 @@ def test_dataframe_dummies_all_obj(self, df, sparse):
189
186
'B_c' : [0 , 0 , 1 ]},
190
187
dtype = np .uint8 )
191
188
if sparse :
192
- expected = pd .SparseDataFrame ({
189
+ expected = pd .DataFrame ({
193
190
"A_a" : pd .SparseArray ([1 , 0 , 1 ], dtype = 'uint8' ),
194
191
"A_b" : pd .SparseArray ([0 , 1 , 0 ], dtype = 'uint8' ),
195
192
"B_b" : pd .SparseArray ([1 , 1 , 0 ], dtype = 'uint8' ),
196
193
"B_c" : pd .SparseArray ([0 , 0 , 1 ], dtype = 'uint8' ),
197
194
})
198
195
199
- tm .assert_sp_frame_equal (result , expected )
200
- else :
201
- assert_frame_equal (result , expected )
196
+ assert_frame_equal (result , expected )
202
197
203
198
def test_dataframe_dummies_mix_default (self , df , sparse , dtype ):
204
199
result = get_dummies (df , sparse = sparse , dtype = dtype )
@@ -402,7 +397,7 @@ def test_basic_drop_first(self, sparse):
402
397
403
398
result = get_dummies (s_list , drop_first = True , sparse = sparse )
404
399
if sparse :
405
- expected = expected .to_sparse ( fill_value = 0 , kind = 'integer' )
400
+ expected = expected .apply ( pd . SparseArray , fill_value = 0 )
406
401
assert_frame_equal (result , expected )
407
402
408
403
result = get_dummies (s_series , drop_first = True , sparse = sparse )
@@ -436,7 +431,7 @@ def test_basic_drop_first_NA(self, sparse):
436
431
res = get_dummies (s_NA , drop_first = True , sparse = sparse )
437
432
exp = DataFrame ({'b' : [0 , 1 , 0 ]}, dtype = np .uint8 )
438
433
if sparse :
439
- exp = exp .to_sparse ( fill_value = 0 , kind = 'integer' )
434
+ exp = exp .apply ( pd . SparseArray , fill_value = 0 )
440
435
441
436
assert_frame_equal (res , exp )
442
437
@@ -447,7 +442,7 @@ def test_basic_drop_first_NA(self, sparse):
447
442
nan : [0 , 0 , 1 ]},
448
443
dtype = np .uint8 ).reindex (['b' , nan ], axis = 1 )
449
444
if sparse :
450
- exp_na = exp_na .to_sparse ( fill_value = 0 , kind = 'integer' )
445
+ exp_na = exp_na .apply ( pd . SparseArray , fill_value = 0 )
451
446
assert_frame_equal (res_na , exp_na )
452
447
453
448
res_just_na = get_dummies ([nan ], dummy_na = True , drop_first = True ,
@@ -462,7 +457,7 @@ def test_dataframe_dummies_drop_first(self, df, sparse):
462
457
'B_c' : [0 , 0 , 1 ]},
463
458
dtype = np .uint8 )
464
459
if sparse :
465
- expected = expected .to_sparse ( fill_value = 0 , kind = 'integer' )
460
+ expected = expected .apply ( pd . SparseArray , fill_value = 0 )
466
461
assert_frame_equal (result , expected )
467
462
468
463
def test_dataframe_dummies_drop_first_with_categorical (
0 commit comments