forked from TheAlgorithms/Python
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlinear_regression.py
113 lines (89 loc) · 3.74 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import numpy as np
import requests
def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return: dataset obtained from the link, as a matrix
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/master/Week1/ADRvsRating.csv",
timeout=10,
)
data = np.loadtxt(response.text.splitlines()[1:], delimiter=",") # Skip the header
return data
def normalize_features(data):
"""Normalize feature values to have mean 0 and variance 1"""
means = np.mean(data[:, :-1], axis=0)
stds = np.std(data[:, :-1], axis=0)
data[:, :-1] = (data[:, :-1] - means) / stds
return data
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
"""Run steep gradient descent and updates the Feature vector accordingly
:param data_x: contains the dataset
:param data_y: contains the output associated with each data-entry
:param len_data: length of the data
:param alpha: Learning rate of the model
:param theta: Feature vector (weights for our model)
:return: Updated Features, using curr_features - alpha * gradient(w.r.t. feature)
"""
n = len_data
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)
theta = theta - (alpha / n) * sum_grad
return theta
def sum_of_square_error(data_x, data_y, len_data, theta):
"""Return sum of square error for error calculation
:param data_x: contains our dataset
:param data_y: contains the output (result vector)
:param len_data: length of the dataset
:param theta: contains the feature vector
:return: sum of square error computed from given features
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))
error = sum_elem / (2 * len_data)
return error
def run_linear_regression(data_x, data_y):
"""Implement Linear regression over the dataset
:param data_x: contains our dataset
:param data_y: contains the output (result vector)
:return: feature for the line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550
no_features = data_x.shape[1]
len_data = data_x.shape[0]
theta = np.zeros((1, no_features))
rng = np.random.default_rng() # Create a random generator instance
for i in range(iterations):
indices = rng.choice(len_data, size=32, replace=False) # Randomly sample indices using the generator
x_batch = data_x[indices]
y_batch = data_y[indices]
theta = run_steep_gradient_descent(x_batch, y_batch, len(x_batch), alpha, theta)
error = sum_of_square_error(x_batch, y_batch, len(x_batch), theta)
print(f"At Iteration {i + 1} - Error is {error:.5f}")
return theta
def mean_absolute_error(predicted_y, original_y):
"""Return mean absolute error for error calculation
:param predicted_y: contains the output of prediction (result vector)
:param original_y: contains values of expected outcome
:return: mean absolute error computed from given features
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)
def main():
"""Driver function"""
data = collect_dataset()
data = normalize_features(data) # Normalize the features
len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float) # Add bias term
data_y = data[:, -1].astype(float)
theta = run_linear_regression(data_x, data_y)
len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")
if __name__ == "__main__":
main()