@@ -56,6 +56,15 @@ def predict(self, X):
56
56
return True
57
57
58
58
59
+ class DateEncoder ():
60
+ def fit (self , X , y = None ):
61
+ return self
62
+
63
+ def transform (self , X ):
64
+ dt = X .dt
65
+ return pd .concat ([dt .year , dt .month , dt .day ], axis = 1 )
66
+
67
+
59
68
class ToSparseTransformer (BaseEstimator , TransformerMixin ):
60
69
"""
61
70
Transforms numpy matrix to sparse format.
@@ -225,6 +234,87 @@ def test_pca(complex_dataframe):
225
234
assert cols [1 ] == 'feat1_feat2_1'
226
235
227
236
237
+ def test_input_df_true_first_transformer (simple_dataframe , monkeypatch ):
238
+ """
239
+ If input_df is True, the first transformer is passed
240
+ a pd.Series instead of an np.array
241
+ """
242
+ df = simple_dataframe
243
+ monkeypatch .setattr (MockXTransformer , 'fit' , Mock ())
244
+ monkeypatch .setattr (MockXTransformer , 'transform' ,
245
+ Mock (return_value = np .array ([1 , 2 , 3 ])))
246
+ mapper = DataFrameMapper ([
247
+ ('a' , MockXTransformer ())
248
+ ], input_df = True )
249
+ out = mapper .fit_transform (df )
250
+
251
+ args , _ = MockXTransformer ().fit .call_args
252
+ assert isinstance (args [0 ], pd .Series )
253
+
254
+ args , _ = MockXTransformer ().transform .call_args
255
+ assert isinstance (args [0 ], pd .Series )
256
+
257
+ assert_array_equal (out , np .array ([1 , 2 , 3 ]).reshape (- 1 , 1 ))
258
+
259
+
260
+ def test_input_df_true_next_transformers (simple_dataframe , monkeypatch ):
261
+ """
262
+ If input_df is True, the subsequent transformers get passed pandas
263
+ objects instead of numpy arrays (given the previous transformers
264
+ output pandas objects as well)
265
+ """
266
+ df = simple_dataframe
267
+ monkeypatch .setattr (MockTClassifier , 'fit' , Mock ())
268
+ monkeypatch .setattr (MockTClassifier , 'transform' ,
269
+ Mock (return_value = pd .Series ([1 , 2 , 3 ])))
270
+ mapper = DataFrameMapper ([
271
+ ('a' , [MockXTransformer (), MockTClassifier ()])
272
+ ], input_df = True )
273
+ out = mapper .fit_transform (df )
274
+
275
+ args , _ = MockTClassifier ().fit .call_args
276
+ assert isinstance (args [0 ], pd .Series )
277
+
278
+ assert_array_equal (out , np .array ([1 , 2 , 3 ]).reshape (- 1 , 1 ))
279
+
280
+
281
+ def test_input_df_true_multiple_cols (complex_dataframe ):
282
+ """
283
+ When input_df is True, applying transformers to multiple columns
284
+ works as expected
285
+ """
286
+ df = complex_dataframe
287
+
288
+ mapper = DataFrameMapper ([
289
+ ('target' , MockXTransformer ()),
290
+ ('feat1' , MockXTransformer ()),
291
+ ], input_df = True )
292
+ out = mapper .fit_transform (df )
293
+
294
+ assert_array_equal (out [:, 0 ], df ['target' ].values )
295
+ assert_array_equal (out [:, 1 ], df ['feat1' ].values )
296
+
297
+
298
+ def test_input_df_date_encoder ():
299
+ """
300
+ When input_df is True we can apply a transformer that only works
301
+ with pandas dataframes like a DateEncoder
302
+ """
303
+ df = pd .DataFrame (
304
+ {'dates' : pd .date_range ('2015-10-30' , '2015-11-02' )})
305
+ mapper = DataFrameMapper ([
306
+ ('dates' , DateEncoder ())
307
+ ], input_df = True )
308
+ out = mapper .fit_transform (df )
309
+ expected = np .array ([
310
+ [2015 , 10 , 30 ],
311
+ [2015 , 10 , 31 ],
312
+ [2015 , 11 , 1 ],
313
+ [2015 , 11 , 2 ]
314
+ ])
315
+ assert_array_equal (out , expected )
316
+
317
+
228
318
def test_nonexistent_columns_explicit_fail (simple_dataframe ):
229
319
"""
230
320
If a nonexistent column is selected, KeyError is raised.
0 commit comments