Skip to content

Commit c300b02

Browse files
committed
Added Ridge Regression To Machine Learning
1 parent 03a4251 commit c300b02

File tree

1 file changed

+182
-0
lines changed

1 file changed

+182
-0
lines changed

machine_learning/ridge_regression.py

+182
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,182 @@
1+
"""
2+
Ridge Regression is a type of linear regression that includes an L2 regularization term
3+
to prevent overfitting and improve generalization. It is commonly used when multicollinearity
4+
occurs, as it helps to reduce the model's complexity by penalizing large coefficients,
5+
resulting in better prediction performance on unseen data.
6+
7+
This implementation uses gradient descent to optimize the weights, with an L2 penalty to
8+
regularize the feature vector. The code reads a dataset with Average Damage per Round (ADR)
9+
and player ratings, processes the data, and applies ridge regression to predict ADR
10+
based on player ratings.
11+
12+
WIKI: https://en.wikipedia.org/wiki/Ridge_regression
13+
"""
14+
15+
import numpy as np
16+
import pandas as pd
17+
from sklearn.metrics import mean_absolute_error
18+
19+
class RidgeRegression:
20+
"""
21+
A Ridge Regression model with L2 regularization.
22+
23+
Attributes:
24+
learning_rate (float): Step size for gradient descent optimization.
25+
regularization_param (float): Regularization strength (lambda), penalizing large weights.
26+
num_iterations (int): Number of iterations for gradient descent.
27+
weights (np.ndarray): Feature weights.
28+
bias (float): Bias term for the regression model.
29+
"""
30+
def __init__(self, learning_rate=0.01, regularization_param=0.1, num_iterations=1000):
31+
self.learning_rate = learning_rate
32+
self.regularization_param = regularization_param
33+
self.num_iterations = num_iterations
34+
self.weights = None
35+
self.bias = 0
36+
37+
def fit(self, X, y):
38+
"""
39+
Fits the ridge regression model to the data using gradient descent.
40+
41+
Args:
42+
X (np.ndarray): Input features.
43+
y (np.ndarray): Target variable.
44+
45+
>>> model = RidgeRegression(learning_rate=0.01, regularization_param=0.1, num_iterations=1000)
46+
>>> X = np.array([[1], [2], [3], [4]])
47+
>>> y = np.array([2, 3, 4, 5])
48+
>>> model.fit(X, y)
49+
>>> round(model.weights[0], 2)
50+
0.86
51+
"""
52+
num_samples, num_features = X.shape
53+
self.weights = np.zeros(num_features)
54+
55+
for i in range(self.num_iterations):
56+
y_pred = self.predict(X)
57+
error = y_pred - y
58+
59+
# Calculate gradients with L2 regularization
60+
dw = (1 / num_samples) * (X.T.dot(error) + self.regularization_param * self.weights)
61+
db = (1 / num_samples) * np.sum(error)
62+
63+
# Update weights and bias
64+
self.weights -= self.learning_rate * dw
65+
self.bias -= self.learning_rate * db
66+
67+
def predict(self, X):
68+
"""
69+
Predicts target values for the input data X using the trained model.
70+
71+
Args:
72+
X (np.ndarray): Input features for which to predict target values.
73+
74+
Returns:
75+
np.ndarray: Predicted target values.
76+
77+
>>> model = RidgeRegression()
78+
>>> model.weights, model.bias = np.array([0.5]), 1
79+
>>> X = np.array([[1], [2], [3]])
80+
>>> model.predict(X)
81+
array([1.5, 2. , 2.5])
82+
"""
83+
return X.dot(self.weights) + self.bias
84+
85+
def calculate_error(self, X, y):
86+
"""
87+
Calculates the Mean Squared Error (MSE) between the predicted and actual target values.
88+
89+
Args:
90+
X (np.ndarray): Input features.
91+
y (np.ndarray): Actual target values.
92+
93+
Returns:
94+
float: Mean Squared Error (MSE).
95+
96+
>>> model = RidgeRegression()
97+
>>> model.weights, model.bias = np.array([0.5]), 1
98+
>>> X = np.array([[1], [2], [3]])
99+
>>> y = np.array([1.5, 2.5, 3.5])
100+
>>> round(model.calculate_error(X, y), 2)
101+
0.0
102+
"""
103+
y_pred = self.predict(X)
104+
return np.mean((y - y_pred) ** 2) # Mean squared error
105+
106+
def calculate_mae(self, X, y):
107+
"""
108+
Calculates the Mean Absolute Error (MAE) between the predicted and actual target values.
109+
110+
Args:
111+
X (np.ndarray): Input features.
112+
y (np.ndarray): Actual target values.
113+
114+
Returns:
115+
float: Mean Absolute Error (MAE).
116+
117+
>>> model = RidgeRegression()
118+
>>> model.weights, model.bias = np.array([0.5]), 1
119+
>>> X = np.array([[1], [2], [3]])
120+
>>> y = np.array([1.5, 2.5, 3.5])
121+
>>> round(model.calculate_mae(X, y), 2)
122+
0.0
123+
"""
124+
y_pred = self.predict(X)
125+
return mean_absolute_error(y, y_pred)
126+
127+
# Load data
128+
def load_data(filepath):
129+
"""
130+
Loads data from a CSV file, extracting 'PlayerRating' as the feature
131+
and 'ADR' as the target variable.
132+
133+
Args:
134+
filepath (str): Path to the CSV file containing data.
135+
136+
Returns:
137+
tuple: (X, y) where X is the feature array and y is the target array.
138+
139+
>>> data = load_data('player_data.csv')
140+
>>> isinstance(data[0], np.ndarray) and isinstance(data[1], np.ndarray)
141+
True
142+
"""
143+
data = pd.read_csv(filepath)
144+
X = data[['PlayerRating']].values # Feature
145+
y = data['ADR'].values # Target
146+
return X, y
147+
148+
# Example usage
149+
if __name__ == "__main__":
150+
"""
151+
Ridge Regression model for predicting Average Damage per Round (ADR) based on player ratings.
152+
153+
The model is initialized with a learning rate, regularization parameter, and a specified
154+
number of gradient descent iterations. After training, it outputs the optimized weights
155+
and bias, and displays the Mean Squared Error (MSE) and Mean Absolute Error (MAE).
156+
157+
>>> model = RidgeRegression(learning_rate=0.01, regularization_param=0.5, num_iterations=1000)
158+
>>> X, y = load_data('player_data.csv')
159+
>>> model.fit(X, y)
160+
>>> isinstance(model.weights, np.ndarray) and isinstance(model.bias, float)
161+
True
162+
"""
163+
import doctest
164+
165+
doctest.testmod()
166+
167+
# Load and preprocess the data
168+
filepath = 'player_data.csv' # Replace with actual file path
169+
X, y = load_data(filepath)
170+
171+
# Initialize and train the model
172+
model = RidgeRegression(learning_rate=0.01, regularization_param=0.5, num_iterations=1000)
173+
model.fit(X, y)
174+
175+
# Calculate and display errors
176+
mse = model.calculate_error(X, y)
177+
mae = model.calculate_mae(X, y)
178+
179+
print(f"Optimized weights: {model.weights}")
180+
print(f"Bias: {model.bias}")
181+
print(f"Mean Squared Error: {mse}")
182+
print(f"Mean Absolute Error: {mae}")

0 commit comments

Comments
 (0)