1
+ from collections import Counter
2
+
1
3
import pytest
4
+ import numpy as np
5
+ from pandas import DataFrame
6
+ from numpy .testing import assert_array_equal
2
7
8
+ from sklearn_pandas import DataFrameMapper
3
9
from sklearn_pandas .features_generator import gen_features
4
10
5
11
@@ -10,10 +16,33 @@ def __init__(self, value=1, name='class'):
10
16
self .name = name
11
17
12
18
19
+ class MockTransformer (object ):
20
+
21
+ def __init__ (self ):
22
+ self .most_common_ = None
23
+
24
+ def fit (self , X , y = None ):
25
+ [(value , _ )] = Counter (X ).most_common (1 )
26
+ self .most_common_ = value
27
+ return self
28
+
29
+ def transform (self , X , y = None ):
30
+ return np .asarray ([self .most_common_ ] * len (X ))
31
+
32
+
33
+ @pytest .fixture
34
+ def simple_dataset ():
35
+ return DataFrame ({
36
+ 'feat1' : [1 , 2 , 1 , 3 , 1 ],
37
+ 'feat2' : [1 , 2 , 2 , 2 , 3 ],
38
+ 'feat3' : [1 , 2 , 3 , 4 , 5 ],
39
+ })
40
+
41
+
13
42
@pytest .mark .parametrize ('columns' , [['colA' , 'colB' , 'colC' ]])
14
43
def test_generate_features_with_default_parameters (columns ):
15
44
"""
16
- Tests generating features from classes with default init arguments
45
+ Tests generating features from classes with default init arguments.
17
46
"""
18
47
feature_defs = gen_features (columns = columns , classes = [MockClass ])
19
48
assert len (feature_defs ) == len (columns )
@@ -28,6 +57,9 @@ def test_generate_features_with_default_parameters(columns):
28
57
29
58
30
59
def test_generate_features_with_several_classes ():
60
+ """
61
+ Tests generating features pipeline with different transformers parameters.
62
+ """
31
63
feature_defs = gen_features (
32
64
columns = ['colA' , 'colB' , 'colC' ],
33
65
classes = [
@@ -44,6 +76,10 @@ def test_generate_features_with_several_classes():
44
76
45
77
46
78
def test_generate_features_with_none_transformers ():
79
+ """
80
+ Tests generating "dummy" feature definiton which doesn't apply any
81
+ transformation.
82
+ """
47
83
feature_defs = gen_features (
48
84
columns = ['colA' , 'colB' , 'colC' ], classes = [None ])
49
85
@@ -54,6 +90,28 @@ def test_generate_features_with_none_transformers():
54
90
assert feature_defs == expected
55
91
56
92
93
+ def test_compatibility_with_data_frame_mapper (simple_dataset ):
94
+ """
95
+ Tests compatibility of generated feature definition with DataFrameMapper.
96
+ """
97
+ features_defs = gen_features (
98
+ columns = ['feat1' , 'feat2' ],
99
+ classes = [MockTransformer ])
100
+ features_defs .append (('feat3' , None ))
101
+
102
+ mapper = DataFrameMapper (features_defs )
103
+ X = mapper .fit_transform (simple_dataset )
104
+ expected = np .asarray ([
105
+ [1 , 2 , 1 ],
106
+ [1 , 2 , 2 ],
107
+ [1 , 2 , 3 ],
108
+ [1 , 2 , 4 ],
109
+ [1 , 2 , 5 ]
110
+ ])
111
+
112
+ assert_array_equal (X , expected )
113
+
114
+
57
115
def assert_attributes (obj , ** attrs ):
58
116
for attr , value in attrs .items ():
59
117
assert getattr (obj , attr ) == value
0 commit comments