Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit 3a07610

Browse files
committedOct 2, 2024·
[pre-commit.ci] auto fixes from pre-commit.com hooks
for more information, see https://pre-commit.ci
1 parent ece859d commit 3a07610

File tree

1 file changed

+35
-17
lines changed

1 file changed

+35
-17
lines changed
 

‎machine_learning/linear_regression.py

+35-17
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from sklearn.linear_model import LinearRegression
44
import matplotlib.pyplot as plt
55

6+
67
def collect_dataset():
78
"""Collect dataset of CSGO
89
The dataset contains ADR vs Rating of a Player
@@ -21,6 +22,7 @@ def collect_dataset():
2122
dataset = np.array(data, dtype=float)
2223
return dataset
2324

25+
2426
def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
2527
"""Run gradient descent to find approximate coefficients
2628
:param X: feature matrix
@@ -31,18 +33,19 @@ def run_gradient_descent(X, y, learning_rate=0.0001550, iterations=100000):
3133
"""
3234
m = X.shape[0]
3335
theta = np.zeros(X.shape[1])
34-
36+
3537
for i in range(iterations):
3638
h = np.dot(X, theta)
3739
gradient = np.dot(X.T, (h - y)) / m
3840
theta -= learning_rate * gradient
39-
41+
4042
if i % 10000 == 0:
4143
mse = np.mean((h - y) ** 2)
4244
print(f"Iteration {i}: MSE = {mse:.5f}")
43-
45+
4446
return theta
4547

48+
4649
def calculate_ols_coefficients(X, y):
4750
"""Calculate optimal coefficients using the normal equation
4851
:param X: feature matrix
@@ -51,47 +54,62 @@ def calculate_ols_coefficients(X, y):
5154
"""
5255
return np.linalg.inv(X.T.dot(X)).dot(X.T).dot(y)
5356

57+
5458
def main():
5559
"""Driver function"""
5660
data = collect_dataset()
57-
61+
5862
X = data[:, 0].reshape(-1, 1)
5963
y = data[:, 1]
60-
64+
6165
# Add intercept term to X
6266
X_with_intercept = np.c_[np.ones(X.shape[0]), X]
63-
67+
6468
# Gradient Descent
6569
gd_theta = run_gradient_descent(X_with_intercept, y)
66-
print(f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}")
67-
70+
print(
71+
f"Gradient Descent coefficients: intercept = {gd_theta[0]:.5f}, slope = {gd_theta[1]:.5f}"
72+
)
73+
6874
# Ordinary Least Squares (Normal Equation)
6975
ols_theta = calculate_ols_coefficients(X_with_intercept, y)
70-
print(f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}")
71-
76+
print(
77+
f"OLS coefficients: intercept = {ols_theta[0]:.5f}, slope = {ols_theta[1]:.5f}"
78+
)
79+
7280
# Sklearn for comparison
7381
reg = LinearRegression().fit(X, y)
74-
print(f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}")
75-
82+
print(
83+
f"Sklearn coefficients: intercept = {reg.intercept_:.5f}, slope = {reg.coef_[0]:.5f}"
84+
)
85+
7686
# Calculate and print MSE for each method
7787
gd_mse = np.mean((np.dot(X_with_intercept, gd_theta) - y) ** 2)
7888
ols_mse = np.mean((np.dot(X_with_intercept, ols_theta) - y) ** 2)
7989
sklearn_mse = np.mean((reg.predict(X) - y) ** 2)
80-
90+
8191
print(f"Gradient Descent MSE: {gd_mse:.5f}")
8292
print(f"OLS MSE: {ols_mse:.5f}")
8393
print(f"Sklearn MSE: {sklearn_mse:.5f}")
84-
94+
8595
# Plotting
8696
plt.scatter(X, y, color="lightgray", label="Data points")
87-
plt.plot(X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent")
88-
plt.plot(X, np.dot(X_with_intercept, ols_theta), color="green", label="OLS (Normal Equation)")
97+
plt.plot(
98+
X, np.dot(X_with_intercept, gd_theta), color="red", label="Gradient Descent"
99+
)
100+
plt.plot(
101+
X,
102+
np.dot(X_with_intercept, ols_theta),
103+
color="green",
104+
label="OLS (Normal Equation)",
105+
)
89106
plt.plot(X, reg.predict(X), color="blue", label="Sklearn")
90107
plt.legend()
91108
plt.xlabel("ADR")
92109
plt.ylabel("Rating")
93110
plt.title("Linear Regression: ADR vs Rating")
94111
plt.show()
95112

113+
96114
if __name__ == "__main__":
97-
main()
115+
main()

0 commit comments

Comments
 (0)
Please sign in to comment.