Skip to content

Commit 9d4db3e

Browse files
datacubeRdatacubeRsolegalli
authored
Add code examples in preprocessing and wrappers modules' docstrings (feature-engine#643)
* Adding code examples for SkWrapper, MatchCategories, and MatchVariables * Fixing details in the examples * modify sparse parameter in ohe --------- Co-authored-by: datacubeR <[email protected]> Co-authored-by: Soledad Galli <[email protected]>
1 parent 1d9ccc4 commit 9d4db3e

File tree

3 files changed

+105
-0
lines changed

3 files changed

+105
-0
lines changed

feature_engine/preprocessing/match_categories.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,27 @@ class MatchCategories(
8888
8989
transform:
9090
Enforce the type of categorical variables as dtype `categorical`.
91+
92+
Examples
93+
--------
94+
95+
>>> import pandas as pd
96+
>>> from feature_engine.preprocessing import MatchCategories
97+
>>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [4,5,6]))
98+
>>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"], x2 = [5,6,4,7]))
99+
>>> mc = MatchCategories(missing_values="ignore")
100+
>>> mc.fit(X_train)
101+
>>> mc.transform(X_train)
102+
x1 x2
103+
0 a 4
104+
1 b 5
105+
2 c 6
106+
>>> mc.transform(X_test)
107+
x1 x2
108+
0 c 5
109+
1 b 6
110+
2 a 4
111+
3 NaN 7
91112
"""
92113

93114
def __init__(

feature_engine/preprocessing/match_columns.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -100,6 +100,50 @@ class MatchVariables(BaseEstimator, TransformerMixin, GetFeatureNamesOutMixin):
100100
101101
transform:
102102
Add or delete variables to match those observed in the train set.
103+
104+
Examples
105+
--------
106+
107+
>>> import pandas as pd
108+
>>> from feature_engine.preprocessing import MatchVariables
109+
>>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [4,5,6]))
110+
>>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"],
111+
>>> x2 = [5,6,4,7],
112+
>>> x3 = [1,1,1,1]))
113+
>>> mv = MatchVariables(missing_values="ignore")
114+
>>> mv.fit(X_train)
115+
>>> mv.transform(X_train)
116+
x1 x2
117+
0 a 4
118+
1 b 5
119+
2 c 6
120+
>>> mv.transform(X_test)
121+
The following variables are dropped from the DataFrame: ['x3']
122+
x1 x2
123+
0 c 5
124+
1 b 6
125+
2 a 4
126+
3 d 7
127+
128+
>>> import pandas as pd
129+
>>> from feature_engine.preprocessing import MatchVariables
130+
>>> X_train = pd.DataFrame(dict(x1 = ["a","b","c"],
131+
>>> x2 = [4,5,6], x3 = [1,1,1]))
132+
>>> X_test = pd.DataFrame(dict(x1 = ["c","b","a","d"], x2 = [5,6,4,7]))
133+
>>> mv = MatchVariables(missing_values="ignore")
134+
>>> mv.fit(X_train)
135+
>>> mv.transform(X_train)
136+
x1 x2 x3
137+
0 a 4 1
138+
1 b 5 1
139+
2 c 6 1
140+
>>> mv.transform(X_test)
141+
The following variables are added to the DataFrame: ['x3']
142+
x1 x2 x3
143+
0 c 5 NaN
144+
1 b 6 NaN
145+
2 a 4 NaN
146+
3 d 7 NaN
103147
"""
104148

105149
def __init__(

feature_engine/wrappers/wrappers.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,46 @@ class SklearnTransformerWrapper(BaseEstimator, TransformerMixin):
144144
See Also
145145
--------
146146
sklearn.compose.ColumnTransformer
147+
148+
Examples
149+
--------
150+
151+
>>> import pandas as pd
152+
>>> from feature_engine.wrappers import SklearnTransformerWrapper
153+
>>> from sklearn.preprocessing import StandardScaler
154+
>>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
155+
>>> skw = SklearnTransformerWrapper(StandardScaler())
156+
>>> skw.fit(X)
157+
>>> skw.transform(X)
158+
x1 x2 x3
159+
0 a -1.224745 -1.224745
160+
1 b 0.000000 0.000000
161+
2 c 1.224745 1.224745
162+
163+
>>> import pandas as pd
164+
>>> from feature_engine.wrappers import SklearnTransformerWrapper
165+
>>> from sklearn.preprocessing import OneHotEncoder
166+
>>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
167+
>>> skw = SklearnTransformerWrapper(
168+
>>> OneHotEncoder(sparse_output = False), variables = "x1")
169+
>>> skw.fit(X)
170+
>>> skw.transform(X)
171+
x2 x3 x1_a x1_b x1_c
172+
0 1 4 1.0 0.0 0.0
173+
1 2 5 0.0 1.0 0.0
174+
2 3 6 0.0 0.0 1.0
175+
176+
>>> import pandas as pd
177+
>>> from feature_engine.wrappers import SklearnTransformerWrapper
178+
>>> from sklearn.preprocessing import PolynomialFeatures
179+
>>> X = pd.DataFrame(dict(x1 = ["a","b","c"], x2 = [1,2,3], x3 = [4,5,6]))
180+
>>> skw = SklearnTransformerWrapper(PolynomialFeatures(include_bias = False))
181+
>>> skw.fit(X)
182+
>>> skw.transform(X)
183+
x1 x2 x3 x2^2 x2 x3 x3^2
184+
0 a 1.0 4.0 1.0 4.0 16.0
185+
1 b 2.0 5.0 4.0 10.0 25.0
186+
2 c 3.0 6.0 9.0 18.0 36.0
147187
"""
148188

149189
def __init__(

0 commit comments

Comments
 (0)