-
-
Notifications
You must be signed in to change notification settings - Fork 46.7k
/
Copy pathridge_regression.py
105 lines (81 loc) · 3.41 KB
/
ridge_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import numpy as np
from matplotlib import pyplot as plt
from sklearn import datasets
# Ridge Regression function
# reference : https://en.wikipedia.org/wiki/Ridge_regression
def ridge_cost_function(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float) -> float:
"""
Compute the Ridge regression cost function with L2 regularization.
J(θ) = (1/2m) * Σ (y_i - hθ(x))^2 + (a/2) * Σ θ_j^2 (for j=1 to n)
Where:
- J(θ) is the cost function we aim to minimize
- m is the number of training examples
- hθ(x) = X * θ (prediction)
- y_i is the actual target value for example i
- a is the regularization parameter
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@returns: The computed cost value
"""
m = len(y)
predictions = np.dot(x, theta)
cost = (1 / (2 * m)) * np.sum((predictions - y) ** 2) + \
(alpha / 2) * np.sum(theta[1:] ** 2)
return cost
def ridge_gradient_descent(x: np.ndarray, y: np.ndarray, theta: np.ndarray, alpha: float, learning_rate: float, max_iterations: int) -> np.ndarray:
"""
Perform gradient descent to minimize the
cost function and fit the Ridge regression model.
@param X: The feature matrix (m x n)
@param y: The target vector (m,)
@param theta: The initial parameters (weights) of the model (n,)
@param alpha: The regularization parameter
@param learning_rate: The learning rate for gradient descent
@param max_iterations: The number of iterations for gradient descent
@returns: The optimized parameters (weights) of the model (n,)
"""
m = len(y)
for iteration in range(max_iterations):
predictions = np.dot(x, theta)
error = predictions - y
# calculate the gradient
gradient = (1 / m) * np.dot(x.T, error)
gradient[1:] += (alpha / m) * theta[1:]
theta -= learning_rate * gradient
if iteration % 100 == 0:
cost = ridge_cost_function(x, y, theta, alpha)
print(f"Iteration {iteration}, Cost: {cost}")
return theta
if __name__ == "__main__":
import doctest
doctest.testmod()
# Load California Housing dataset
california_housing = datasets.fetch_california_housing()
x = california_housing.data[:, :2] # 2 features for simplicity
y = california_housing.target
x = (x - np.mean(x, axis=0)) / np.std(x, axis=0)
# Add a bias column (intercept) to X
x = np.c_[np.ones(x.shape[0]), x]
# Initialize parameters (theta)
theta_initial = np.zeros(x.shape[1])
# Set hyperparameters
alpha = 0.1
learning_rate = 0.01
max_iterations = 1000
optimized_theta = ridge_gradient_descent(x, y, theta_initial, alpha, learning_rate, max_iterations)
print(f"Optimized theta: {optimized_theta}")
# Prediction
def predict(x, theta):
return np.dot(x, theta)
y_pred = predict(x, optimized_theta)
# Plotting the results (here we visualize predicted vs actual values)
plt.figure(figsize=(10, 6))
plt.scatter(y, y_pred, color='b', label='Predictions vs Actual')
plt.plot([min(y), max(y)], [min(y), max(y)], color='r', label='Perfect Fit')
plt.xlabel("Actual values")
plt.ylabel("Predicted values")
plt.title("Ridge Regression: Actual vs Predicted Values")
plt.legend()
plt.show()