@@ -168,14 +168,19 @@ def transform(self, X, override_return_df=False):
168
168
if not self .cols :
169
169
return X
170
170
171
+ original_cols = set (X .columns )
171
172
X = self .ordinal_encoder .transform (X )
172
173
X = self .basen_encode (X , cols = self .cols )
173
174
174
175
if self .drop_invariant :
175
176
for col in self .drop_cols :
176
177
X .drop (col , 1 , inplace = True )
177
178
178
- X .fillna (0.0 , inplace = True )
179
+ # impute missing values only in the generated columns
180
+ current_cols = set (X .columns )
181
+ fillna_cols = list (current_cols - (original_cols - set (self .cols )))
182
+ X [fillna_cols ] = X [fillna_cols ].fillna (value = 0.0 )
183
+
179
184
if self .return_df or override_return_df :
180
185
return X
181
186
else :
@@ -299,13 +304,13 @@ def basen_to_interger(self, X, cols, base):
299
304
out_cols = X .columns .values
300
305
301
306
for col in cols :
302
- col_list = [col0 for col0 in out_cols if col0 .startswith (col )]
307
+ col_list = [col0 for col0 in out_cols if str ( col0 ) .startswith (col )]
303
308
for col0 in col_list :
304
309
if any (X [col0 ].isnull ()):
305
310
raise ValueError ("inverse_transform is not supported because transform impute"
306
311
"the unknown category -1 when encode %s" % (col ,))
307
312
if base == 1 :
308
- value_array = np .array ([int (col0 .split ('_' )[1 ]) for col0 in col_list ])
313
+ value_array = np .array ([int (col0 .split ('_' )[- 1 ]) for col0 in col_list ])
309
314
else :
310
315
len0 = len (col_list )
311
316
value_array = np .array ([base ** (len0 - 1 - i ) for i in range (len0 )])
0 commit comments