Skip to content

Commit 1486a54

Browse files
modified linear regression to work on OLS
1 parent 95398e0 commit 1486a54

File tree

1 file changed

+14
-67
lines changed

1 file changed

+14
-67
lines changed

Diff for: machine_learning/linear_regression.py

+14-67
Original file line numberDiff line numberDiff line change
@@ -31,85 +31,32 @@ def collect_dataset():
3131
return dataset
3232

3333

34-
def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta):
35-
"""Run steep gradient descent and updates the Feature vector accordingly_
36-
:param data_x : contains the dataset
37-
:param data_y : contains the output associated with each data-entry
38-
:param len_data : length of the data_
39-
:param alpha : Learning rate of the model
40-
:param theta : Feature vector (weight's for our model)
41-
;param return : Updated Feature's, using
42-
curr_features - alpha_ * gradient(w.r.t. feature)
43-
"""
44-
n = len_data
45-
46-
prod = np.dot(theta, data_x.transpose())
47-
prod -= data_y.transpose()
48-
sum_grad = np.dot(prod, data_x)
49-
theta = theta - (alpha / n) * sum_grad
50-
return theta
51-
52-
53-
def sum_of_square_error(data_x, data_y, len_data, theta):
54-
"""Return sum of square error for error calculation
55-
:param data_x : contains our dataset
56-
:param data_y : contains the output (result vector)
57-
:param len_data : len of the dataset
58-
:param theta : contains the feature vector
59-
:return : sum of square error computed from given feature's
60-
"""
61-
prod = np.dot(theta, data_x.transpose())
62-
prod -= data_y.transpose()
63-
sum_elem = np.sum(np.square(prod))
64-
error = sum_elem / (2 * len_data)
65-
return error
66-
67-
68-
def run_linear_regression(data_x, data_y):
69-
"""Implement Linear regression over the dataset
70-
:param data_x : contains our dataset
71-
:param data_y : contains the output (result vector)
34+
def run_linear_regression_ols(data_x, data_y):
35+
"""Implement Linear regression using OLS over the dataset
36+
:param data_x : contains our dataset
37+
:param data_y : contains the output (result vector)
7238
:return : feature for line of best fit (Feature vector)
7339
"""
74-
iterations = 100000
75-
alpha = 0.0001550
76-
77-
no_features = data_x.shape[1]
78-
len_data = data_x.shape[0] - 1
40+
# Add a column of ones to data_x for the bias term
41+
data_x = np.c_[np.ones(data_x.shape[0]), data_x].astype(float)
7942

80-
theta = np.zeros((1, no_features))
81-
82-
for i in range(iterations):
83-
theta = run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta)
84-
error = sum_of_square_error(data_x, data_y, len_data, theta)
85-
print(f"At Iteration {i + 1} - Error is {error:.5f}")
43+
# Use NumPy's built-in function to solve the linear regression problem
44+
theta = np.linalg.inv(data_x.T.dot(data_x)).dot(data_x.T).dot(data_y)
8645

8746
return theta
8847

8948

90-
def mean_absolute_error(predicted_y, original_y):
91-
"""Return sum of square error for error calculation
92-
:param predicted_y : contains the output of prediction (result vector)
93-
:param original_y : contains values of expected outcome
94-
:return : mean absolute error computed from given feature's
95-
"""
96-
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
97-
return total / len(original_y)
98-
99-
10049
def main():
10150
"""Driver function"""
10251
data = collect_dataset()
103-
104-
len_data = data.shape[0]
105-
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
52+
data_x = data[:, :-1].astype(float)
10653
data_y = data[:, -1].astype(float)
10754

108-
theta = run_linear_regression(data_x, data_y)
109-
len_result = theta.shape[1]
110-
print("Resultant Feature vector : ")
111-
for i in range(len_result):
112-
print(f"{theta[0, i]:.5f}")
55+
theta = run_linear_regression_ols(data_x, data_y)
56+
print("Resultant Feature vector (weights): ")
57+
theta_list = theta.tolist()[0]
58+
for i in range(len(theta_list)):
59+
print(f"{theta_list[i]:.5f}")
11360

11461

11562
if __name__ == "__main__":

0 commit comments

Comments
 (0)