6
6
from sklearn_pandas import CategoricalImputer
7
7
from sklearn_pandas import DataFrameMapper
8
8
9
+ # In sklearn18 NotFittedError was moved from utils.validation
10
+ # to exceptions module.
11
+ try :
12
+ from sklearn .exceptions import NotFittedError
13
+ except ImportError :
14
+ from sklearn .utils .validation import NotFittedError
15
+
9
16
10
17
@pytest .mark .parametrize ('none_value' , [None , np .nan ])
11
18
@pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
@@ -16,16 +23,65 @@ def test_unit(input_type, none_value):
16
23
if input_type == 'pd' :
17
24
X = pd .Series (data )
18
25
else :
19
- X = np .asarray (data )
26
+ X = np .asarray (data , dtype = object )
20
27
21
28
Xc = X .copy ()
22
29
23
30
Xt = CategoricalImputer ().fit_transform (X )
24
31
25
32
assert (np .asarray (X ) == np .asarray (Xc )).all ()
26
33
assert type (Xt ) == np .ndarray
27
- assert len (X ) == len (Xt )
28
- assert len (Xt [pd .isnull (Xt )]) == 0
34
+ assert (Xt == ['a' , 'b' , 'b' , 'b' ]).all ()
35
+
36
+
37
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
38
+ def test_missing_values_param (input_type ):
39
+
40
+ data = ['x' , 'y' , 'a_missing' , 'y' ]
41
+
42
+ if input_type == 'pd' :
43
+ X = pd .Series (data )
44
+ else :
45
+ X = np .asarray (data , dtype = object )
46
+
47
+ imp = CategoricalImputer (missing_values = 'a_missing' )
48
+ Xt = imp .fit_transform (X )
49
+
50
+ assert (Xt == np .array (['x' , 'y' , 'y' , 'y' ])).all ()
51
+
52
+
53
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
54
+ def test_copy_param (input_type ):
55
+
56
+ data = ['a' , np .nan , 'b' , 'a' ]
57
+
58
+ if input_type == 'pd' :
59
+ X = pd .Series (data )
60
+ else :
61
+ X = np .asarray (data , dtype = object )
62
+
63
+ imp = CategoricalImputer (copy = False )
64
+ Xt = imp .fit_transform (X )
65
+
66
+ Xe = np .array (['a' , 'a' , 'b' , 'a' ])
67
+ assert (Xt == Xe ).all ()
68
+ assert (X == Xe ).all ()
69
+
70
+
71
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
72
+ def test_data_type (input_type ):
73
+
74
+ data = ['a' , np .nan , 'b' , 3 , 'a' , 3 , 'a' , 4.5 ]
75
+
76
+ if input_type == 'pd' :
77
+ X = pd .Series (data )
78
+ else :
79
+ X = np .asarray (data , dtype = object )
80
+
81
+ Xt = CategoricalImputer ().fit_transform (X )
82
+
83
+ Xe = np .array (['a' , 'a' , 'b' , 3 , 'a' , 3 , 'a' , 4.5 ], dtype = object )
84
+ assert (Xt == Xe ).all ()
29
85
30
86
31
87
@pytest .mark .parametrize ('none_value' , [None , np .nan ])
@@ -50,3 +106,12 @@ def test_integration(none_value):
50
106
51
107
assert (df ['cat' ][val_idx ] == df_t ['cat' ][val_idx ]).all ()
52
108
assert (df_t ['cat' ][nan_idx ] == df ['cat' ].mode ().values [0 ]).all ()
109
+
110
+
111
+ def test_not_fitted ():
112
+ """
113
+ If imputer is not fitted, NotFittedError is raised.
114
+ """
115
+ imp = CategoricalImputer ()
116
+ with pytest .raises (NotFittedError ):
117
+ imp .transform (np .array (['a' , 'b' , 'b' , None ]))
0 commit comments