6
6
from sklearn_pandas import CategoricalImputer
7
7
from sklearn_pandas import DataFrameMapper
8
8
9
+ # In sklearn18 NotFittedError was moved from utils.validation
10
+ # to exceptions module.
11
+ try :
12
+ from sklearn .exceptions import NotFittedError
13
+ except ImportError :
14
+ from sklearn .utils .validation import NotFittedError
15
+
9
16
10
17
@pytest .mark .parametrize ('none_value' , [None , np .nan ])
11
18
@pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
@@ -16,16 +23,79 @@ def test_unit(input_type, none_value):
16
23
if input_type == 'pd' :
17
24
X = pd .Series (data )
18
25
else :
19
- X = np .asarray (data )
26
+ X = np .asarray (data , dtype = object )
20
27
21
28
Xc = X .copy ()
22
29
23
30
Xt = CategoricalImputer ().fit_transform (X )
24
31
25
32
assert (np .asarray (X ) == np .asarray (Xc )).all ()
26
33
assert type (Xt ) == np .ndarray
27
- assert len (X ) == len (Xt )
28
- assert len (Xt [pd .isnull (Xt )]) == 0
34
+ assert (Xt == ['a' , 'b' , 'b' , 'b' ]).all ()
35
+
36
+
37
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
38
+ def test_no_mode (input_type ):
39
+
40
+ data = ['a' , 'b' , 'c' , np .nan ]
41
+
42
+ if input_type == 'pd' :
43
+ X = pd .Series (data )
44
+ else :
45
+ X = np .asarray (data , dtype = object )
46
+
47
+ with pytest .raises (ValueError ):
48
+ CategoricalImputer ().fit_transform (X )
49
+
50
+
51
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
52
+ def test_missing_values_param (input_type ):
53
+
54
+ data = ['x' , 'y' , 'a_missing' , 'y' ]
55
+
56
+ if input_type == 'pd' :
57
+ X = pd .Series (data )
58
+ else :
59
+ X = np .asarray (data , dtype = object )
60
+
61
+ imp = CategoricalImputer (missing_values = 'a_missing' )
62
+ Xt = imp .fit_transform (X )
63
+
64
+ assert (Xt == np .array (['x' , 'y' , 'y' , 'y' ])).all ()
65
+
66
+
67
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
68
+ def test_copy_param (input_type ):
69
+
70
+ data = ['a' , np .nan , 'b' , 'a' ]
71
+
72
+ if input_type == 'pd' :
73
+ X = pd .Series (data )
74
+ else :
75
+ X = np .asarray (data , dtype = object )
76
+
77
+ imp = CategoricalImputer (copy = False )
78
+ Xt = imp .fit_transform (X )
79
+
80
+ Xe = np .array (['a' , 'a' , 'b' , 'a' ])
81
+ assert (Xt == Xe ).all ()
82
+ assert (X == Xe ).all ()
83
+
84
+
85
+ @pytest .mark .parametrize ('input_type' , ['np' , 'pd' ])
86
+ def test_data_type (input_type ):
87
+
88
+ data = ['a' , np .nan , 'b' , 3 , 'a' , 3 , 'a' , 4.5 ]
89
+
90
+ if input_type == 'pd' :
91
+ X = pd .Series (data )
92
+ else :
93
+ X = np .asarray (data , dtype = object )
94
+
95
+ Xt = CategoricalImputer ().fit_transform (X )
96
+
97
+ Xe = np .array (['a' , 'a' , 'b' , 3 , 'a' , 3 , 'a' , 4.5 ], dtype = object )
98
+ assert (Xt == Xe ).all ()
29
99
30
100
31
101
@pytest .mark .parametrize ('none_value' , [None , np .nan ])
@@ -50,3 +120,12 @@ def test_integration(none_value):
50
120
51
121
assert (df ['cat' ][val_idx ] == df_t ['cat' ][val_idx ]).all ()
52
122
assert (df_t ['cat' ][nan_idx ] == df ['cat' ].mode ().values [0 ]).all ()
123
+
124
+
125
+ def test_not_fitted ():
126
+ """
127
+ If imputer is not fitted, NotFittedError is raised.
128
+ """
129
+ imp = CategoricalImputer ()
130
+ with pytest .raises (NotFittedError ):
131
+ imp .transform (np .array (['a' , 'b' , 'b' , None ]))
0 commit comments