Skip to content

Commit 0fec92b

Browse files
committed
Add Ridge Regression To Machine Learning
1 parent c300b02 commit 0fec92b

File tree

1 file changed

+78
-156
lines changed

1 file changed

+78
-156
lines changed

machine_learning/ridge_regression.py

+78-156
Original file line numberDiff line numberDiff line change
@@ -1,182 +1,104 @@
11
"""
2-
Ridge Regression is a type of linear regression that includes an L2 regularization term
3-
to prevent overfitting and improve generalization. It is commonly used when multicollinearity
4-
occurs, as it helps to reduce the model's complexity by penalizing large coefficients,
5-
resulting in better prediction performance on unseen data.
2+
Ridge Regression with L2 Regularization using Gradient Descent.
63
7-
This implementation uses gradient descent to optimize the weights, with an L2 penalty to
8-
regularize the feature vector. The code reads a dataset with Average Damage per Round (ADR)
9-
and player ratings, processes the data, and applies ridge regression to predict ADR
10-
based on player ratings.
4+
Ridge Regression is a type of linear regression that includes an L2 regularization
5+
term to prevent overfitting and improve generalization. It is commonly used when
6+
multicollinearity is present in the data.
117
12-
WIKI: https://en.wikipedia.org/wiki/Ridge_regression
8+
More on Ridge Regression: https://en.wikipedia.org/wiki/Tikhonov_regularization
139
"""
1410

11+
from typing import Tuple
1512
import numpy as np
1613
import pandas as pd
17-
from sklearn.metrics import mean_absolute_error
1814

19-
class RidgeRegression:
15+
def load_data(file_path: str) -> Tuple[np.ndarray, np.ndarray]:
2016
"""
21-
A Ridge Regression model with L2 regularization.
22-
23-
Attributes:
24-
learning_rate (float): Step size for gradient descent optimization.
25-
regularization_param (float): Regularization strength (lambda), penalizing large weights.
26-
num_iterations (int): Number of iterations for gradient descent.
27-
weights (np.ndarray): Feature weights.
28-
bias (float): Bias term for the regression model.
29-
"""
30-
def __init__(self, learning_rate=0.01, regularization_param=0.1, num_iterations=1000):
31-
self.learning_rate = learning_rate
32-
self.regularization_param = regularization_param
33-
self.num_iterations = num_iterations
34-
self.weights = None
35-
self.bias = 0
36-
37-
def fit(self, X, y):
38-
"""
39-
Fits the ridge regression model to the data using gradient descent.
40-
41-
Args:
42-
X (np.ndarray): Input features.
43-
y (np.ndarray): Target variable.
44-
45-
>>> model = RidgeRegression(learning_rate=0.01, regularization_param=0.1, num_iterations=1000)
46-
>>> X = np.array([[1], [2], [3], [4]])
47-
>>> y = np.array([2, 3, 4, 5])
48-
>>> model.fit(X, y)
49-
>>> round(model.weights[0], 2)
50-
0.86
51-
"""
52-
num_samples, num_features = X.shape
53-
self.weights = np.zeros(num_features)
54-
55-
for i in range(self.num_iterations):
56-
y_pred = self.predict(X)
57-
error = y_pred - y
58-
59-
# Calculate gradients with L2 regularization
60-
dw = (1 / num_samples) * (X.T.dot(error) + self.regularization_param * self.weights)
61-
db = (1 / num_samples) * np.sum(error)
62-
63-
# Update weights and bias
64-
self.weights -= self.learning_rate * dw
65-
self.bias -= self.learning_rate * db
66-
67-
def predict(self, X):
68-
"""
69-
Predicts target values for the input data X using the trained model.
70-
71-
Args:
72-
X (np.ndarray): Input features for which to predict target values.
73-
74-
Returns:
75-
np.ndarray: Predicted target values.
76-
77-
>>> model = RidgeRegression()
78-
>>> model.weights, model.bias = np.array([0.5]), 1
79-
>>> X = np.array([[1], [2], [3]])
80-
>>> model.predict(X)
81-
array([1.5, 2. , 2.5])
82-
"""
83-
return X.dot(self.weights) + self.bias
84-
85-
def calculate_error(self, X, y):
86-
"""
87-
Calculates the Mean Squared Error (MSE) between the predicted and actual target values.
88-
89-
Args:
90-
X (np.ndarray): Input features.
91-
y (np.ndarray): Actual target values.
92-
93-
Returns:
94-
float: Mean Squared Error (MSE).
95-
96-
>>> model = RidgeRegression()
97-
>>> model.weights, model.bias = np.array([0.5]), 1
98-
>>> X = np.array([[1], [2], [3]])
99-
>>> y = np.array([1.5, 2.5, 3.5])
100-
>>> round(model.calculate_error(X, y), 2)
101-
0.0
102-
"""
103-
y_pred = self.predict(X)
104-
return np.mean((y - y_pred) ** 2) # Mean squared error
105-
106-
def calculate_mae(self, X, y):
107-
"""
108-
Calculates the Mean Absolute Error (MAE) between the predicted and actual target values.
109-
110-
Args:
111-
X (np.ndarray): Input features.
112-
y (np.ndarray): Actual target values.
113-
114-
Returns:
115-
float: Mean Absolute Error (MAE).
116-
117-
>>> model = RidgeRegression()
118-
>>> model.weights, model.bias = np.array([0.5]), 1
119-
>>> X = np.array([[1], [2], [3]])
120-
>>> y = np.array([1.5, 2.5, 3.5])
121-
>>> round(model.calculate_mae(X, y), 2)
122-
0.0
123-
"""
124-
y_pred = self.predict(X)
125-
return mean_absolute_error(y, y_pred)
126-
127-
# Load data
128-
def load_data(filepath):
129-
"""
130-
Loads data from a CSV file, extracting 'PlayerRating' as the feature
131-
and 'ADR' as the target variable.
17+
Load data from a CSV file and return features and target arrays.
13218
13319
Args:
134-
filepath (str): Path to the CSV file containing data.
20+
file_path: Path to the CSV file.
13521
13622
Returns:
137-
tuple: (X, y) where X is the feature array and y is the target array.
23+
A tuple containing features (X) and target (y) as numpy arrays.
13824
139-
>>> data = load_data('player_data.csv')
140-
>>> isinstance(data[0], np.ndarray) and isinstance(data[1], np.ndarray)
25+
Example:
26+
>>> data = pd.DataFrame({'ADR': [200, 220], 'Rating': [1.2, 1.4]})
27+
>>> data.to_csv('sample.csv', index=False)
28+
>>> X, y = load_data('sample.csv')
29+
>>> X.shape == (2, 1) and y.shape == (2,)
14130
True
14231
"""
143-
data = pd.read_csv(filepath)
144-
X = data[['PlayerRating']].values # Feature
145-
y = data['ADR'].values # Target
32+
data = pd.read_csv(file_path)
33+
X = data[['Rating']].to_numpy() # Use .to_numpy() instead of .values (PD011)
34+
y = data['ADR'].to_numpy()
14635
return X, y
14736

148-
# Example usage
149-
if __name__ == "__main__":
37+
def ridge_gradient_descent(
38+
X: np.ndarray, y: np.ndarray, reg_lambda: float, learning_rate: float,
39+
num_iters: int = 1000
40+
) -> np.ndarray:
15041
"""
151-
Ridge Regression model for predicting Average Damage per Round (ADR) based on player ratings.
42+
Perform Ridge Regression using gradient descent.
15243
153-
The model is initialized with a learning rate, regularization parameter, and a specified
154-
number of gradient descent iterations. After training, it outputs the optimized weights
155-
and bias, and displays the Mean Squared Error (MSE) and Mean Absolute Error (MAE).
44+
Args:
45+
X: Feature matrix.
46+
y: Target vector.
47+
reg_lambda: Regularization parameter (lambda).
48+
learning_rate: Learning rate for gradient descent.
49+
num_iters: Number of iterations for gradient descent.
50+
51+
Returns:
52+
Optimized weights (coefficients) for predicting ADR from Rating.
15653
157-
>>> model = RidgeRegression(learning_rate=0.01, regularization_param=0.5, num_iterations=1000)
158-
>>> X, y = load_data('player_data.csv')
159-
>>> model.fit(X, y)
160-
>>> isinstance(model.weights, np.ndarray) and isinstance(model.bias, float)
54+
Example:
55+
>>> X = np.array([[1.2], [1.4]])
56+
>>> y = np.array([200, 220])
57+
>>> ridge_gradient_descent(X, y, reg_lambda=0.1, learning_rate=0.01).shape == (1,)
16158
True
16259
"""
163-
import doctest
60+
weights = np.zeros(X.shape[1])
61+
m = len(y)
62+
63+
for _ in range(num_iters):
64+
predictions = X @ weights
65+
error = predictions - y
66+
gradient = (X.T @ error + reg_lambda * weights) / m
67+
weights -= learning_rate * gradient
68+
69+
return weights
70+
71+
def mean_absolute_error(y_true: np.ndarray, y_pred: np.ndarray) -> float:
72+
"""
73+
Calculate the Mean Absolute Error (MAE) between true and predicted values.
16474
75+
Args:
76+
y_true: Actual values.
77+
y_pred: Predicted values.
78+
79+
Returns:
80+
Mean absolute error.
81+
82+
Example:
83+
>>> mean_absolute_error(np.array([200, 220]), np.array([205, 215]))
84+
5.0
85+
"""
86+
return np.mean(np.abs(y_true - y_pred))
87+
88+
if __name__ == "__main__":
89+
import doctest
16590
doctest.testmod()
16691

167-
# Load and preprocess the data
168-
filepath = 'player_data.csv' # Replace with actual file path
169-
X, y = load_data(filepath)
170-
171-
# Initialize and train the model
172-
model = RidgeRegression(learning_rate=0.01, regularization_param=0.5, num_iterations=1000)
173-
model.fit(X, y)
174-
175-
# Calculate and display errors
176-
mse = model.calculate_error(X, y)
177-
mae = model.calculate_mae(X, y)
178-
179-
print(f"Optimized weights: {model.weights}")
180-
print(f"Bias: {model.bias}")
181-
print(f"Mean Squared Error: {mse}")
182-
print(f"Mean Absolute Error: {mae}")
92+
# Load the data
93+
X, y = load_data("sample.csv")
94+
95+
# Fit the Ridge Regression model
96+
optimized_weights = ridge_gradient_descent(X, y, reg_lambda=0.1, learning_rate=0.01)
97+
98+
# Make predictions
99+
y_pred = X @ optimized_weights
100+
101+
# Calculate Mean Absolute Error
102+
mae = mean_absolute_error(y, y_pred)
103+
print("Optimized Weights:", optimized_weights)
104+
print("Mean Absolute Error:", mae)

0 commit comments

Comments
 (0)