@@ -821,12 +821,13 @@ def get_dummies(data, prefix=None, prefix_sep='_', dummy_na=False,
821
821
from pandas .core .reshape .concat import concat
822
822
from itertools import cycle
823
823
824
+ dtypes_to_encode = ['object' , 'category' ]
825
+
824
826
if isinstance (data , DataFrame ):
825
827
# determine columns being encoded
826
-
827
828
if columns is None :
828
829
data_to_encode = data .select_dtypes (
829
- include = [ 'object' , 'category' ] )
830
+ include = dtypes_to_encode )
830
831
else :
831
832
data_to_encode = data [columns ]
832
833
@@ -844,6 +845,7 @@ def check_len(item, name):
844
845
845
846
check_len (prefix , 'prefix' )
846
847
check_len (prefix_sep , 'prefix_sep' )
848
+
847
849
if isinstance (prefix , compat .string_types ):
848
850
prefix = cycle ([prefix ])
849
851
if isinstance (prefix , dict ):
@@ -859,15 +861,20 @@ def check_len(item, name):
859
861
prefix_sep = [prefix_sep [col ] for col in data_to_encode .columns ]
860
862
861
863
if data_to_encode .shape == data .shape :
864
+ # Encoding the entire df, do not prepend any dropped columns
862
865
with_dummies = []
863
866
elif columns is not None :
867
+ # Encoding only cols specified in columns. Get all cols not in
868
+ # columns to prepend to result.
864
869
with_dummies = [data .drop (columns , axis = 1 )]
865
870
else :
866
- with_dummies = [data .select_dtypes (exclude = ['object' , 'category' ])]
871
+ # Encoding only object and category dtype columns. Get remaining
872
+ # columns to prepend to result.
873
+ with_dummies = [data .select_dtypes (exclude = dtypes_to_encode )]
867
874
868
875
for (col , pre , sep ) in zip (data_to_encode .iteritems (), prefix ,
869
876
prefix_sep ):
870
-
877
+ # col is (column_name, column), use just column data here
871
878
dummy = _get_dummies_1d (col [1 ], prefix = pre , prefix_sep = sep ,
872
879
dummy_na = dummy_na , sparse = sparse ,
873
880
drop_first = drop_first , dtype = dtype )
0 commit comments