1
1
"""
2
- Implementation of sequential minimal optimization (SMO) for support vector machines
3
- (SVM).
2
+ Sequential minimal optimization (SMO) for support vector machines (SVM)
4
3
5
- Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
6
- programming (QP) problem that arises during the training of support vector
7
- machines.
8
- It was invented by John Platt in 1998.
4
+ Sequential minimal optimization (SMO) is an algorithm for solving the quadratic
5
+ programming (QP) problem that arises during the training of SVMs. It was invented by
6
+ John Platt in 1998.
9
7
10
8
Input:
11
9
0: type: numpy.ndarray.
@@ -124,8 +122,7 @@ def fit(self):
124
122
b_old = self ._b
125
123
self ._b = b
126
124
127
- # 4: update error value,here we only calculate those non-bound samples'
128
- # error
125
+ # 4: update error, here we only calculate the error for non-bound samples
129
126
self ._unbound = [i for i in self ._all_samples if self ._is_unbound (i )]
130
127
for s in self .unbound :
131
128
if s in (i1 , i2 ):
@@ -136,7 +133,7 @@ def fit(self):
136
133
+ (self ._b - b_old )
137
134
)
138
135
139
- # if i1 or i2 is non-bound,update there error value to zero
136
+ # if i1 or i2 is non-bound, update their error value to zero
140
137
if self ._is_unbound (i1 ):
141
138
self ._error [i1 ] = 0
142
139
if self ._is_unbound (i2 ):
@@ -161,7 +158,7 @@ def predict(self, test_samples, classify=True):
161
158
results .append (result )
162
159
return np .array (results )
163
160
164
- # Check if alpha violate KKT condition
161
+ # Check if alpha violates the KKT condition
165
162
def _check_obey_kkt (self , index ):
166
163
alphas = self .alphas
167
164
tol = self ._tol
@@ -172,20 +169,19 @@ def _check_obey_kkt(self, index):
172
169
173
170
# Get value calculated from kernel function
174
171
def _k (self , i1 , i2 ):
175
- # for test samples,use Kernel function
172
+ # for test samples, use kernel function
176
173
if isinstance (i2 , np .ndarray ):
177
174
return self .Kernel (self .samples [i1 ], i2 )
178
- # for train samples,Kernel values have been saved in matrix
175
+ # for training samples, kernel values have been saved in matrix
179
176
else :
180
177
return self ._K_matrix [i1 , i2 ]
181
178
182
- # Get sample's error
179
+ # Get error for sample
183
180
def _e (self , index ):
184
181
"""
185
182
Two cases:
186
- 1:Sample[index] is non-bound,Fetch error from list: _error
187
- 2:sample[index] is bound,Use predicted value deduct true value: g(xi) - yi
188
-
183
+ 1: Sample[index] is non-bound, fetch error from list: _error
184
+ 2: sample[index] is bound, use predicted value minus true value: g(xi) - yi
189
185
"""
190
186
# get from error data
191
187
if self ._is_unbound (index ):
@@ -196,7 +192,7 @@ def _e(self, index):
196
192
yi = self .tags [index ]
197
193
return gx - yi
198
194
199
- # Calculate Kernel matrix of all possible i1,i2 , saving time
195
+ # Calculate kernel matrix of all possible i1, i2, saving time
200
196
def _calculate_k_matrix (self ):
201
197
k_matrix = np .zeros ([self .length , self .length ])
202
198
for i in self ._all_samples :
@@ -206,7 +202,7 @@ def _calculate_k_matrix(self):
206
202
)
207
203
return k_matrix
208
204
209
- # Predict test sample's tag
205
+ # Predict tag for test sample
210
206
def _predict (self , sample ):
211
207
k = self ._k
212
208
predicted_value = (
@@ -222,30 +218,31 @@ def _predict(self, sample):
222
218
223
219
# Choose alpha1 and alpha2
224
220
def _choose_alphas (self ):
225
- locis = yield from self ._choose_a1 ()
226
- if not locis :
221
+ loci = yield from self ._choose_a1 ()
222
+ if not loci :
227
223
return None
228
- return locis
224
+ return loci
229
225
230
226
def _choose_a1 (self ):
231
227
"""
232
- Choose first alpha ;steps:
233
- 1:First loop over all sample
234
- 2:Second loop over all non-bound samples till all non-bound samples does not
235
- voilate kkt condition.
236
- 3:Repeat this two process endlessly,till all samples does not voilate kkt
237
- condition samples after first loop.
228
+ Choose first alpha
229
+ Steps:
230
+ 1: First loop over all samples
231
+ 2: Second loop over all non-bound samples until no non-bound samples violate
232
+ the KKT condition.
233
+ 3: Repeat these two processes until no samples violate the KKT condition
234
+ after the first loop.
238
235
"""
239
236
while True :
240
237
all_not_obey = True
241
238
# all sample
242
- print ("scanning all sample !" )
239
+ print ("Scanning all samples !" )
243
240
for i1 in [i for i in self ._all_samples if self ._check_obey_kkt (i )]:
244
241
all_not_obey = False
245
242
yield from self ._choose_a2 (i1 )
246
243
247
244
# non-bound sample
248
- print ("scanning non-bound sample !" )
245
+ print ("Scanning non-bound samples !" )
249
246
while True :
250
247
not_obey = True
251
248
for i1 in [
@@ -256,20 +253,21 @@ def _choose_a1(self):
256
253
not_obey = False
257
254
yield from self ._choose_a2 (i1 )
258
255
if not_obey :
259
- print ("all non-bound samples fit the KKT condition!" )
256
+ print ("All non-bound samples satisfy the KKT condition!" )
260
257
break
261
258
if all_not_obey :
262
- print ("all samples fit the KKT condition! Optimization done !" )
259
+ print ("All samples satisfy the KKT condition!" )
263
260
break
264
261
return False
265
262
266
263
def _choose_a2 (self , i1 ):
267
264
"""
268
- Choose the second alpha by using heuristic algorithm ;steps:
269
- 1: Choose alpha2 which gets the maximum step size (|E1 - E2|).
270
- 2: Start in a random point,loop over all non-bound samples till alpha1 and
265
+ Choose the second alpha using a heuristic algorithm
266
+ Steps:
267
+ 1: Choose alpha2 that maximizes the step size (|E1 - E2|).
268
+ 2: Start in a random point, loop over all non-bound samples till alpha1 and
271
269
alpha2 are optimized.
272
- 3: Start in a random point,loop over all samples till alpha1 and alpha2 are
270
+ 3: Start in a random point, loop over all samples till alpha1 and alpha2 are
273
271
optimized.
274
272
"""
275
273
self ._unbound = [i for i in self ._all_samples if self ._is_unbound (i )]
@@ -306,7 +304,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
306
304
if i1 == i2 :
307
305
return None , None
308
306
309
- # calculate L and H which bound the new alpha2
307
+ # calculate L and H which bound the new alpha2
310
308
s = y1 * y2
311
309
if s == - 1 :
312
310
l , h = max (0.0 , a2 - a1 ), min (self ._c , self ._c + a2 - a1 ) # noqa: E741
@@ -320,7 +318,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
320
318
k22 = k (i2 , i2 )
321
319
k12 = k (i1 , i2 )
322
320
323
- # select the new alpha2 which could get the minimal objectives
321
+ # select the new alpha2 which could achieve the minimal objectives
324
322
if (eta := k11 + k22 - 2.0 * k12 ) > 0.0 :
325
323
a2_new_unc = a2 + (y2 * (e1 - e2 )) / eta
326
324
# a2_new has a boundary
@@ -335,7 +333,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
335
333
l1 = a1 + s * (a2 - l )
336
334
h1 = a1 + s * (a2 - h )
337
335
338
- # way 1
336
+ # Method 1
339
337
f1 = y1 * (e1 + b ) - a1 * k (i1 , i1 ) - s * a2 * k (i1 , i2 )
340
338
f2 = y2 * (e2 + b ) - a2 * k (i2 , i2 ) - s * a1 * k (i1 , i2 )
341
339
ol = (
@@ -353,9 +351,8 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
353
351
+ s * h * h1 * k (i1 , i2 )
354
352
)
355
353
"""
356
- # way 2
357
- Use objective function check which alpha2 new could get the minimal
358
- objectives
354
+ Method 2: Use objective function to check which alpha2_new could achieve the
355
+ minimal objectives
359
356
"""
360
357
if ol < (oh - self ._eps ):
361
358
a2_new = l
@@ -375,7 +372,7 @@ def _get_new_alpha(self, i1, i2, a1, a2, e1, e2, y1, y2):
375
372
376
373
return a1_new , a2_new
377
374
378
- # Normalise data using min_max way
375
+ # Normalize data using min-max method
379
376
def _norm (self , data ):
380
377
if self ._init :
381
378
self ._min = np .min (data , axis = 0 )
@@ -424,7 +421,7 @@ def _rbf(self, v1, v2):
424
421
425
422
def _check (self ):
426
423
if self ._kernel == self ._rbf and self .gamma < 0 :
427
- raise ValueError ("gamma value must greater than 0 " )
424
+ raise ValueError ("gamma value must be non-negative " )
428
425
429
426
def _get_kernel (self , kernel_name ):
430
427
maps = {"linear" : self ._linear , "poly" : self ._polynomial , "rbf" : self ._rbf }
@@ -444,27 +441,27 @@ def call_func(*args, **kwargs):
444
441
start_time = time .time ()
445
442
func (* args , ** kwargs )
446
443
end_time = time .time ()
447
- print (f"smo algorithm cost { end_time - start_time } seconds" )
444
+ print (f"SMO algorithm cost { end_time - start_time } seconds" )
448
445
449
446
return call_func
450
447
451
448
452
449
@count_time
453
- def test_cancel_data ():
454
- print ("Hello!\n Start test svm by smo algorithm!" )
450
+ def test_cancer_data ():
451
+ print ("Hello!\n Start test SVM using the SMO algorithm!" )
455
452
# 0: download dataset and load into pandas' dataframe
456
- if not os .path .exists (r"cancel_data .csv" ):
453
+ if not os .path .exists (r"cancer_data .csv" ):
457
454
request = urllib .request .Request ( # noqa: S310
458
455
CANCER_DATASET_URL ,
459
456
headers = {"User-Agent" : "Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)" },
460
457
)
461
458
response = urllib .request .urlopen (request ) # noqa: S310
462
459
content = response .read ().decode ("utf-8" )
463
- with open (r"cancel_data .csv" , "w" ) as f :
460
+ with open (r"cancer_data .csv" , "w" ) as f :
464
461
f .write (content )
465
462
466
463
data = pd .read_csv (
467
- "cancel_data .csv" ,
464
+ "cancer_data .csv" ,
468
465
header = None ,
469
466
dtype = {0 : str }, # Assuming the first column contains string data
470
467
)
@@ -479,14 +476,14 @@ def test_cancel_data():
479
476
train_data , test_data = samples [:328 , :], samples [328 :, :]
480
477
test_tags , test_samples = test_data [:, 0 ], test_data [:, 1 :]
481
478
482
- # 3: choose kernel function,and set initial alphas to zero(optional)
483
- mykernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
479
+ # 3: choose kernel function, and set initial alphas to zero (optional)
480
+ my_kernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
484
481
al = np .zeros (train_data .shape [0 ])
485
482
486
483
# 4: calculating best alphas using SMO algorithm and predict test_data samples
487
484
mysvm = SmoSVM (
488
485
train = train_data ,
489
- kernel_func = mykernel ,
486
+ kernel_func = my_kernel ,
490
487
alpha_list = al ,
491
488
cost = 0.4 ,
492
489
b = 0.0 ,
@@ -501,30 +498,30 @@ def test_cancel_data():
501
498
for i in range (test_tags .shape [0 ]):
502
499
if test_tags [i ] == predict [i ]:
503
500
score += 1
504
- print (f"\n all : { test_num } \n right : { score } \n false : { test_num - score } " )
501
+ print (f"\n All : { test_num } \n Correct : { score } \n Incorrect : { test_num - score } " )
505
502
print (f"Rough Accuracy: { score / test_tags .shape [0 ]} " )
506
503
507
504
508
505
def test_demonstration ():
509
506
# change stdout
510
- print ("\n Start plot,please wait!! !" )
507
+ print ("\n Starting plot, please wait!" )
511
508
sys .stdout = open (os .devnull , "w" )
512
509
513
510
ax1 = plt .subplot2grid ((2 , 2 ), (0 , 0 ))
514
511
ax2 = plt .subplot2grid ((2 , 2 ), (0 , 1 ))
515
512
ax3 = plt .subplot2grid ((2 , 2 ), (1 , 0 ))
516
513
ax4 = plt .subplot2grid ((2 , 2 ), (1 , 1 ))
517
- ax1 .set_title ("linear svm, cost: 0.1" )
514
+ ax1 .set_title ("Linear SVM, cost = 0.1" )
518
515
test_linear_kernel (ax1 , cost = 0.1 )
519
- ax2 .set_title ("linear svm, cost: 500" )
516
+ ax2 .set_title ("Linear SVM, cost = 500" )
520
517
test_linear_kernel (ax2 , cost = 500 )
521
- ax3 .set_title ("rbf kernel svm, cost: 0.1" )
518
+ ax3 .set_title ("RBF kernel SVM, cost = 0.1" )
522
519
test_rbf_kernel (ax3 , cost = 0.1 )
523
- ax4 .set_title ("rbf kernel svm, cost: 500" )
520
+ ax4 .set_title ("RBF kernel SVM, cost = 500" )
524
521
test_rbf_kernel (ax4 , cost = 500 )
525
522
526
523
sys .stdout = sys .__stdout__
527
- print ("Plot done!!! " )
524
+ print ("Plot done!" )
528
525
529
526
530
527
def test_linear_kernel (ax , cost ):
@@ -535,10 +532,10 @@ def test_linear_kernel(ax, cost):
535
532
scaler = StandardScaler ()
536
533
train_x_scaled = scaler .fit_transform (train_x , train_y )
537
534
train_data = np .hstack ((train_y .reshape (500 , 1 ), train_x_scaled ))
538
- mykernel = Kernel (kernel = "linear" , degree = 5 , coef0 = 1 , gamma = 0.5 )
535
+ my_kernel = Kernel (kernel = "linear" , degree = 5 , coef0 = 1 , gamma = 0.5 )
539
536
mysvm = SmoSVM (
540
537
train = train_data ,
541
- kernel_func = mykernel ,
538
+ kernel_func = my_kernel ,
542
539
cost = cost ,
543
540
tolerance = 0.001 ,
544
541
auto_norm = False ,
@@ -555,10 +552,10 @@ def test_rbf_kernel(ax, cost):
555
552
scaler = StandardScaler ()
556
553
train_x_scaled = scaler .fit_transform (train_x , train_y )
557
554
train_data = np .hstack ((train_y .reshape (500 , 1 ), train_x_scaled ))
558
- mykernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
555
+ my_kernel = Kernel (kernel = "rbf" , degree = 5 , coef0 = 1 , gamma = 0.5 )
559
556
mysvm = SmoSVM (
560
557
train = train_data ,
561
- kernel_func = mykernel ,
558
+ kernel_func = my_kernel ,
562
559
cost = cost ,
563
560
tolerance = 0.001 ,
564
561
auto_norm = False ,
@@ -571,11 +568,11 @@ def plot_partition_boundary(
571
568
model , train_data , ax , resolution = 100 , colors = ("b" , "k" , "r" )
572
569
):
573
570
"""
574
- We can not get the optimum w of our kernel svm model which is different from linear
575
- svm . For this reason, we generate randomly distributed points with high desity and
576
- prediced values of these points are calculated by using our trained model. Then we
577
- could use this prediced values to draw contour map.
578
- And this contour map can represent svm 's partition boundary.
571
+ We cannot get the optimal w of our kernel SVM model, which is different from a
572
+ linear SVM . For this reason, we generate randomly distributed points with high
573
+ density, and predicted values of these points are calculated using our trained
574
+ model. Then we could use this predicted values to draw contour map, and this contour
575
+ map represents the SVM 's partition boundary.
579
576
"""
580
577
train_data_x = train_data [:, 1 ]
581
578
train_data_y = train_data [:, 2 ]
@@ -620,6 +617,6 @@ def plot_partition_boundary(
620
617
621
618
622
619
if __name__ == "__main__" :
623
- test_cancel_data ()
620
+ test_cancer_data ()
624
621
test_demonstration ()
625
622
plt .show ()
0 commit comments