Skip to content

Commit e496027

Browse files
committed
Features generator support
1 parent b490559 commit e496027

File tree

2 files changed

+109
-0
lines changed

2 files changed

+109
-0
lines changed

sklearn_pandas/features_generator.py

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
def gen_features(columns, classes=None):
2+
"""Generates a feature definition list which can be passed
3+
into DataFrameMapper
4+
5+
Params:
6+
7+
columns a list of column names to generate features for.
8+
9+
classes a list of classes for each feature, a list dictionaries with
10+
transformer class and init parameters, or None.
11+
12+
If list of classes is provided, then each of them is
13+
instantiated with default arguments:
14+
15+
classes = [StandardScaler, LabelBinarizer]
16+
17+
If list of dictionaries is provided, then each of them should
18+
have a 'class' key with transformer class. All other keys are
19+
passed into 'class' value constructor:
20+
21+
classes = [
22+
{'class': StandardScaler, 'with_mean': False},
23+
{'class': LabelBinarizer}
24+
}]
25+
26+
If None value selected, then each feature left as is.
27+
28+
"""
29+
if classes is None:
30+
return [(column, None) for column in columns]
31+
32+
feature_defs = []
33+
34+
for column in columns:
35+
feature_transformers = []
36+
37+
for definition in classes:
38+
if isinstance(definition, dict):
39+
params = definition.copy()
40+
klass = params.pop('class')
41+
feature_transformers.append(klass(**params))
42+
elif isinstance(definition, type):
43+
feature_transformers.append(definition())
44+
45+
if not feature_transformers:
46+
feature_transformers = None
47+
48+
feature_defs.append((column, feature_transformers))
49+
50+
return feature_defs

tests/test_features_generator.py

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import pytest
2+
3+
from sklearn_pandas.features_generator import gen_features
4+
5+
6+
class MockClass(object):
7+
8+
def __init__(self, value=1, name='class'):
9+
self.value = value
10+
self.name = name
11+
12+
13+
@pytest.mark.parametrize('columns', [['colA', 'colB', 'colC']])
14+
def test_generate_features_with_default_parameters(columns):
15+
"""
16+
Tests generating features from classes with default init arguments
17+
"""
18+
feature_defs = gen_features(columns=columns, classes=[MockClass])
19+
assert len(feature_defs) == len(columns)
20+
21+
feature_dict = dict(feature_defs)
22+
assert columns == sorted(feature_dict)
23+
24+
expected = {'value': 1, 'name': 'class'}
25+
for column, transformers in feature_dict.items():
26+
for obj in transformers:
27+
assert_attributes(obj, **expected)
28+
29+
30+
def test_generate_features_with_several_classes():
31+
feature_defs = gen_features(
32+
columns=['colA', 'colB', 'colC'],
33+
classes=[
34+
{'class': MockClass},
35+
{'class': MockClass, 'name': 'mockA'},
36+
{'class': MockClass, 'name': 'mockB', 'value': None}
37+
]
38+
)
39+
40+
for transformers in dict(feature_defs).values():
41+
assert_attributes(transformers[0], name='class', value=1)
42+
assert_attributes(transformers[1], name='mockA', value=1)
43+
assert_attributes(transformers[2], name='mockB', value=None)
44+
45+
46+
def test_generate_features_with_none_transformers():
47+
feature_defs = gen_features(
48+
columns=['colA', 'colB', 'colC'], classes=[None])
49+
50+
expected = [('colA', None),
51+
('colB', None),
52+
('colC', None)]
53+
54+
assert feature_defs == expected
55+
56+
57+
def assert_attributes(obj, **attrs):
58+
for attr, value in attrs.items():
59+
assert getattr(obj, attr) == value

0 commit comments

Comments
 (0)