Skip to content

Commit 6991fb0

Browse files
committed
added ridge_regression.py
1 parent 6c92c5a commit 6991fb0

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

machine_learning/ridge_regression.py

+100
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
import numpy as np
2+
from matplotlib import pyplot as plt
3+
from sklearn import datasets
4+
5+
# Ridge Regression function
6+
# reference : https://en.wikipedia.org/wiki/Ridge_regression
7+
def ridge_cost_function(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float:
8+
"""
9+
Compute the Ridge regression cost function with L2 regularization.
10+
11+
J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (α/2) * Σ θ_j^2 (for j=1 to n)
12+
13+
Where:
14+
- J(θ) is the cost function we aim to minimize
15+
- m is the number of training examples
16+
- hθ(x) = X * θ (prediction)
17+
- y_i is the actual target value for example i
18+
- α is the regularization parameter
19+
20+
@param X: The feature matrix (m x n)
21+
@param y: The target vector (m,)
22+
@param theta: The parameters (weights) of the model (n,)
23+
@param alpha: The regularization parameter
24+
25+
@returns: The computed cost value
26+
"""
27+
m = len(y)
28+
predictions = np.dot(X, theta)
29+
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + (alpha / 2) * np.sum(theta[1:] ** 2)
30+
return cost
31+
32+
def ridge_gradient_descent(X: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray:
33+
"""
34+
Perform gradient descent to minimize the cost function and fit the Ridge regression model.
35+
36+
@param X: The feature matrix (m x n)
37+
@param y: The target vector (m,)
38+
@param theta: The initial parameters (weights) of the model (n,)
39+
@param alpha: The regularization parameter
40+
@param learning_rate: The learning rate for gradient descent
41+
@param max_iterations: The number of iterations for gradient descent
42+
43+
@returns: The optimized parameters (weights) of the model (n,)
44+
"""
45+
m = len(y)
46+
47+
for iteration in range(max_iterations):
48+
predictions = np.dot(X, theta)
49+
error = predictions - y
50+
51+
# calculate the gradient
52+
gradient = (1 / m) * np.dot(X.T, error)
53+
gradient[1:] += (alpha / m) * theta[1:]
54+
theta -= learning_rate * gradient
55+
56+
if iteration % 100 == 0:
57+
cost = ridge_cost_function(X, y, theta, alpha)
58+
print(f"Iteration {iteration}, Cost: {cost}")
59+
60+
return theta
61+
62+
63+
64+
if __name__ == "__main__":
65+
import doctest
66+
67+
# Load California Housing dataset
68+
california_housing = datasets.fetch_california_housing()
69+
X = california_housing.data[:, :2] # 2 features for simplicity
70+
y = california_housing.target
71+
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
72+
73+
# Add a bias column (intercept) to X
74+
X = np.c_[np.ones(X.shape[0]), X]
75+
76+
# Initialize parameters (theta)
77+
theta_initial = np.zeros(X.shape[1])
78+
79+
# Set hyperparameters
80+
alpha = 0.1
81+
learning_rate = 0.01
82+
max_iterations = 1000
83+
84+
optimized_theta = ridge_gradient_descent(X, y, theta_initial, alpha, learning_rate, max_iterations)
85+
print(f"Optimized theta: {optimized_theta}")
86+
87+
# Prediction
88+
def predict(X, theta):
89+
return np.dot(X, theta)
90+
y_pred = predict(X, optimized_theta)
91+
92+
# Plotting the results (here we visualize predicted vs actual values)
93+
plt.figure(figsize=(10, 6))
94+
plt.scatter(y, y_pred, color='b', label='Predictions vs Actual')
95+
plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit')
96+
plt.xlabel("Actual values")
97+
plt.ylabel("Predicted values")
98+
plt.title("Ridge Regression: Actual vs Predicted Values")
99+
plt.legend()
100+
plt.show()

0 commit comments

Comments
 (0)