-
-
Notifications
You must be signed in to change notification settings - Fork 46.7k
/
Copy pathlinear_regression.py
115 lines (94 loc) · 3.3 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import numpy as np
import requests
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as numpy array
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines[1:]: # Skip the header
item = item.split(",")
data.append(item)
dataset = np.array(data, dtype=float)
return dataset
def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
"""Run gradient descent to find approximate coefficients
:param X: feature matrix
:param y: target vector
:param learning_rate: learning rate for gradient descent
:param iterations: number of iterations
:return: coefficients (intercept and slope)
"""
m = X.shape[0]
theta = np.zeros(X.shape[1])
for i in range(iterations):
h = np.dot(X, theta)
gradient = np.dot(X.T, (h - y)) / m
theta -= learning_rate * gradient
if i % 10000 == 0:
mse = np.mean((h - y) ** 2)
print(f"Iteration {i}: MSE = {mse:.5f}")
return theta
def calculate_ols_coefficients(X, y):
"""Calculate optimal coefficients using the normal equation
:param X: feature matrix
:param y: target vector
:return: coefficients (intercept and slope)
"""
return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
def main():
"""Driver function"""
data = collect_dataset()
X = data[:, 0].reshape(-1, 1)
y = data[:, 1]
# Add intercept term to X
X_with_intercept = np.c_[np.ones(X.shape[0]), X]
# Gradient Descent
gd_theta = run_gradient_descent(X_with_intercept, y)
print(
f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}"
)
# Ordinary Least Squares (Normal Equation)
ols_theta = calculate_ols_coefficients(X_with_intercept, y)
print(
f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}"
)
# Sklearn for comparison
reg = LinearRegression().fit(X, y)
print(
f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}"
)
# Calculate and print MSE for each method
gd_mse = np.mean((np.dot(X_with_intercept, gd_theta) - y) ** 2)
ols_mse = np.mean((np.dot(X_with_intercept, ols_theta) - y) ** 2)
sklearn_mse = np.mean((reg.predict(X) - y) ** 2)
print(f"Gradient Descent MSE: {gd_mse:.5f}")
print(f"OLS MSE: {ols_mse:.5f}")
print(f"Sklearn MSE: {sklearn_mse:.5f}")
# Plotting
plt.scatter(X, y, color="lightgray", label="Data points")
plt.plot(
X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent"
)
plt.plot(
X,
np.dot(X_with_intercept, ols_theta),
color="green",
label="OLS (Normal Equation)",
)
plt.plot(X, reg.predict(X), color="blue", label="Sklearn")
plt.legend()
plt.xlabel("ADR")
plt.ylabel("Rating")
plt.title("Linear Regression: ADR vs Rating")
plt.show()
if __name__ == "__main__":
main()