3
3
from sklearn .linear_model import LinearRegression
4
4
import matplotlib .pyplot as plt
5
5
6
+
6
7
def collect_dataset ():
7
8
"""Collect dataset of CSGO
8
9
The dataset contains ADR vs Rating of a Player
@@ -21,6 +22,7 @@ def collect_dataset():
21
22
dataset = np .array (data , dtype = float )
22
23
return dataset
23
24
25
+
24
26
def run_gradient_descent (X , y , learning_rate = 0.0001550 , iterations = 100000 ):
25
27
"""Run gradient descent to find approximate coefficients
26
28
:param X: feature matrix
@@ -31,18 +33,19 @@ def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
31
33
"""
32
34
m = X .shape [0 ]
33
35
theta = np .zeros (X .shape [1 ])
34
-
36
+
35
37
for i in range (iterations ):
36
38
h = np .dot (X , theta )
37
39
gradient = np .dot (X .T , (h - y )) / m
38
40
theta -= learning_rate * gradient
39
-
41
+
40
42
if i % 10000 == 0 :
41
43
mse = np .mean ((h - y ) ** 2 )
42
44
print (f"Iteration { i } : MSE = { mse :.5f} " )
43
-
45
+
44
46
return theta
45
47
48
+
46
49
def calculate_ols_coefficients (X , y ):
47
50
"""Calculate optimal coefficients using the normal equation
48
51
:param X: feature matrix
@@ -51,47 +54,62 @@ def calculate_ols_coefficients(X, y):
51
54
"""
52
55
return np .linalg .inv (X .T .dot (X )).dot (X .T ).dot (y )
53
56
57
+
54
58
def main ():
55
59
"""Driver function"""
56
60
data = collect_dataset ()
57
-
61
+
58
62
X = data [:, 0 ].reshape (- 1 , 1 )
59
63
y = data [:, 1 ]
60
-
64
+
61
65
# Add intercept term to X
62
66
X_with_intercept = np .c_ [np .ones (X .shape [0 ]), X ]
63
-
67
+
64
68
# Gradient Descent
65
69
gd_theta = run_gradient_descent (X_with_intercept , y )
66
- print (f"Gradient Descent coefficients: intercept = { gd_theta [0 ]:.5f} , slope = { gd_theta [1 ]:.5f} " )
67
-
70
+ print (
71
+ f"Gradient Descent coefficients: intercept = { gd_theta [0 ]:.5f} , slope = { gd_theta [1 ]:.5f} "
72
+ )
73
+
68
74
# Ordinary Least Squares (Normal Equation)
69
75
ols_theta = calculate_ols_coefficients (X_with_intercept , y )
70
- print (f"OLS coefficients: intercept = { ols_theta [0 ]:.5f} , slope = { ols_theta [1 ]:.5f} " )
71
-
76
+ print (
77
+ f"OLS coefficients: intercept = { ols_theta [0 ]:.5f} , slope = { ols_theta [1 ]:.5f} "
78
+ )
79
+
72
80
# Sklearn for comparison
73
81
reg = LinearRegression ().fit (X , y )
74
- print (f"Sklearn coefficients: intercept = { reg .intercept_ :.5f} , slope = { reg .coef_ [0 ]:.5f} " )
75
-
82
+ print (
83
+ f"Sklearn coefficients: intercept = { reg .intercept_ :.5f} , slope = { reg .coef_ [0 ]:.5f} "
84
+ )
85
+
76
86
# Calculate and print MSE for each method
77
87
gd_mse = np .mean ((np .dot (X_with_intercept , gd_theta ) - y ) ** 2 )
78
88
ols_mse = np .mean ((np .dot (X_with_intercept , ols_theta ) - y ) ** 2 )
79
89
sklearn_mse = np .mean ((reg .predict (X ) - y ) ** 2 )
80
-
90
+
81
91
print (f"Gradient Descent MSE: { gd_mse :.5f} " )
82
92
print (f"OLS MSE: { ols_mse :.5f} " )
83
93
print (f"Sklearn MSE: { sklearn_mse :.5f} " )
84
-
94
+
85
95
# Plotting
86
96
plt .scatter (X , y , color = "lightgray" , label = "Data points" )
87
- plt .plot (X , np .dot (X_with_intercept , gd_theta ), color = "red" , label = "Gradient Descent" )
88
- plt .plot (X , np .dot (X_with_intercept , ols_theta ), color = "green" , label = "OLS (Normal Equation)" )
97
+ plt .plot (
98
+ X , np .dot (X_with_intercept , gd_theta ), color = "red" , label = "Gradient Descent"
99
+ )
100
+ plt .plot (
101
+ X ,
102
+ np .dot (X_with_intercept , ols_theta ),
103
+ color = "green" ,
104
+ label = "OLS (Normal Equation)" ,
105
+ )
89
106
plt .plot (X , reg .predict (X ), color = "blue" , label = "Sklearn" )
90
107
plt .legend ()
91
108
plt .xlabel ("ADR" )
92
109
plt .ylabel ("Rating" )
93
110
plt .title ("Linear Regression: ADR vs Rating" )
94
111
plt .show ()
95
112
113
+
96
114
if __name__ == "__main__" :
97
- main ()
115
+ main ()
0 commit comments