Skip to content

Commit 279545f

Browse files
committed
Testing compatibility of generated feature definition with mapper
1 parent e496027 commit 279545f

File tree

2 files changed

+60
-1
lines changed

2 files changed

+60
-1
lines changed

sklearn_pandas/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
from .dataframe_mapper import DataFrameMapper # NOQA
44
from .cross_validation import cross_val_score, GridSearchCV, RandomizedSearchCV # NOQA
55
from .categorical_imputer import CategoricalImputer # NOQA
6+
from .features_generator import gen_features # NOQA

tests/test_features_generator.py

Lines changed: 59 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
1+
from collections import Counter
2+
13
import pytest
4+
import numpy as np
5+
from pandas import DataFrame
6+
from numpy.testing import assert_array_equal
27

8+
from sklearn_pandas import DataFrameMapper
39
from sklearn_pandas.features_generator import gen_features
410

511

@@ -10,10 +16,33 @@ def __init__(self, value=1, name='class'):
1016
self.name = name
1117

1218

19+
class MockTransformer(object):
20+
21+
def __init__(self):
22+
self.most_common_ = None
23+
24+
def fit(self, X, y=None):
25+
[(value, _)] = Counter(X).most_common(1)
26+
self.most_common_ = value
27+
return self
28+
29+
def transform(self, X, y=None):
30+
return np.asarray([self.most_common_] * len(X))
31+
32+
33+
@pytest.fixture
34+
def simple_dataset():
35+
return DataFrame({
36+
'feat1': [1, 2, 1, 3, 1],
37+
'feat2': [1, 2, 2, 2, 3],
38+
'feat3': [1, 2, 3, 4, 5],
39+
})
40+
41+
1342
@pytest.mark.parametrize('columns', [['colA', 'colB', 'colC']])
1443
def test_generate_features_with_default_parameters(columns):
1544
"""
16-
Tests generating features from classes with default init arguments
45+
Tests generating features from classes with default init arguments.
1746
"""
1847
feature_defs = gen_features(columns=columns, classes=[MockClass])
1948
assert len(feature_defs) == len(columns)
@@ -28,6 +57,9 @@ def test_generate_features_with_default_parameters(columns):
2857

2958

3059
def test_generate_features_with_several_classes():
60+
"""
61+
Tests generating features pipeline with different transformers parameters.
62+
"""
3163
feature_defs = gen_features(
3264
columns=['colA', 'colB', 'colC'],
3365
classes=[
@@ -44,6 +76,10 @@ def test_generate_features_with_several_classes():
4476

4577

4678
def test_generate_features_with_none_transformers():
79+
"""
80+
Tests generating "dummy" feature definiton which doesn't apply any
81+
transformation.
82+
"""
4783
feature_defs = gen_features(
4884
columns=['colA', 'colB', 'colC'], classes=[None])
4985

@@ -54,6 +90,28 @@ def test_generate_features_with_none_transformers():
5490
assert feature_defs == expected
5591

5692

93+
def test_compatibility_with_data_frame_mapper(simple_dataset):
94+
"""
95+
Tests compatibility of generated feature definition with DataFrameMapper.
96+
"""
97+
features_defs = gen_features(
98+
columns=['feat1', 'feat2'],
99+
classes=[MockTransformer])
100+
features_defs.append(('feat3', None))
101+
102+
mapper = DataFrameMapper(features_defs)
103+
X = mapper.fit_transform(simple_dataset)
104+
expected = np.asarray([
105+
[1, 2, 1],
106+
[1, 2, 2],
107+
[1, 2, 3],
108+
[1, 2, 4],
109+
[1, 2, 5]
110+
])
111+
112+
assert_array_equal(X, expected)
113+
114+
57115
def assert_attributes(obj, **attrs):
58116
for attr, value in attrs.items():
59117
assert getattr(obj, attr) == value

0 commit comments

Comments
 (0)