Skip to content

Add Ridge Regression to Machine Learning #12111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
125 changes: 125 additions & 0 deletions machine_learning/ridge_regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
import numpy as np
import requests


def collect_dataset():
"""Collect dataset of CSGO
The dataset contains ADR vs Rating of a Player
:return : dataset obtained from the link, as matrix
"""
response = requests.get(
"https://raw.githubusercontent.com/yashLadha/The_Math_of_Intelligence/"
"master/Week1/ADRvsRating.csv",
timeout=10,
)
lines = response.text.splitlines()
data = []
for item in lines:
item = item.split(",")
data.append(item)
data.pop(0) # This is for removing the labels from the list
dataset = np.matrix(data)
return dataset


def run_steep_gradient_descent(data_x, data_y, len_data, alpha, theta, lambda_reg):
"""Run steep gradient descent and updates the Feature vector accordingly
:param data_x : contains the dataset
:param data_y : contains the output associated with each data-entry
:param len_data : length of the data
:param alpha : Learning rate of the model
:param theta : Feature vector (weights for our model)
:param lambda_reg: Regularization parameter
:return : Updated Features using
curr_features - alpha_ * gradient(w.r.t. feature)
"""
n = len_data

prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_grad = np.dot(prod, data_x)

# Add regularization to the gradient
theta_regularized = np.copy(theta)
theta_regularized[0, 0] = 0 # Don't regularize the bias term
sum_grad += lambda_reg * theta_regularized # Add regularization to gradient

theta = theta - (alpha / n) * sum_grad
return theta


def sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg):
"""Return sum of square error for error calculation
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param len_data : len of the dataset
:param theta : contains the feature vector
:param lambda_reg: Regularization parameter
:return : sum of square error computed from given features
"""
prod = np.dot(theta, data_x.transpose())
prod -= data_y.transpose()
sum_elem = np.sum(np.square(prod))

# Add regularization to the cost function
regularization_term = lambda_reg * np.sum(
np.square(theta[:, 1:])
) # Don't regularize the bias term
error = (sum_elem / (2 * len_data)) + (regularization_term / (2 * len_data))
return error


def run_ridge_regression(data_x, data_y, lambda_reg=1.0):
"""Implement Ridge Regression over the dataset
:param data_x : contains our dataset
:param data_y : contains the output (result vector)
:param lambda_reg: Regularization parameter
:return : feature for line of best fit (Feature vector)
"""
iterations = 100000
alpha = 0.0001550

no_features = data_x.shape[1]
len_data = data_x.shape[0]

theta = np.zeros((1, no_features))

for i in range(iterations):
theta = run_steep_gradient_descent(
data_x, data_y, len_data, alpha, theta, lambda_reg
)
error = sum_of_square_error(data_x, data_y, len_data, theta, lambda_reg)
print(f"At Iteration {i + 1} - Error is {error:.5f}")

return theta


def mean_absolute_error(predicted_y, original_y):
"""Return mean absolute error for error calculation
:param predicted_y : contains the output of prediction (result vector)
:param original_y : contains values of expected outcome
:return : mean absolute error computed from given features
"""
total = sum(abs(y - predicted_y[i]) for i, y in enumerate(original_y))
return total / len(original_y)


def main():
"""Driver function"""
data = collect_dataset()

len_data = data.shape[0]
data_x = np.c_[np.ones(len_data), data[:, :-1]].astype(float)
data_y = data[:, -1].astype(float)

lambda_reg = 1.0 # Set your desired regularization parameter
theta = run_ridge_regression(data_x, data_y, lambda_reg)

len_result = theta.shape[1]
print("Resultant Feature vector : ")
for i in range(len_result):
print(f"{theta[0, i]:.5f}")


if __name__ == "__main__":
main()