|
| 1 | +# Required imports to run this file |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import numpy as np |
| 4 | + |
| 5 | + |
| 6 | +# weighted matrix |
| 7 | +def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat: |
| 8 | + """ |
| 9 | + Calculate the weight for every point in the |
| 10 | + data set. It takes training_point , query_point, and tau |
| 11 | + Here Tau is not a fixed value it can be varied depends on output. |
| 12 | + tau --> bandwidth |
| 13 | + xmat -->Training data |
| 14 | + point --> the x where we want to make predictions |
| 15 | + >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68], |
| 16 | + ... [24.59,25.69]]), 0.6) |
| 17 | + matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000], |
| 18 | + [0.00000000e+000, 0.00000000e+000, 0.00000000e+000], |
| 19 | + [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]]) |
| 20 | + """ |
| 21 | + # m is the number of training samples |
| 22 | + m, n = np.shape(training_data_x) |
| 23 | + # Initializing weights as identity matrix |
| 24 | + weights = np.mat(np.eye(m)) |
| 25 | + # calculating weights for all training examples [x(i)'s] |
| 26 | + for j in range(m): |
| 27 | + diff = point - training_data_x[j] |
| 28 | + weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth ** 2)) |
| 29 | + return weights |
| 30 | + |
| 31 | + |
| 32 | +def local_weight( |
| 33 | + point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float |
| 34 | +) -> np.mat: |
| 35 | + """ |
| 36 | + Calculate the local weights using the weight_matrix function on training data. |
| 37 | + Return the weighted matrix. |
| 38 | + >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68], |
| 39 | + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) |
| 40 | + matrix([[0.00873174], |
| 41 | + [0.08272556]]) |
| 42 | + """ |
| 43 | + weight = weighted_matrix(point, training_data_x, bandwidth) |
| 44 | + W = (training_data_x.T * (weight * training_data_x)).I * ( |
| 45 | + training_data_x.T * weight * training_data_y.T |
| 46 | + ) |
| 47 | + |
| 48 | + return W |
| 49 | + |
| 50 | + |
| 51 | +def local_weight_regression( |
| 52 | + training_data_x: np.mat, training_data_y: np.mat, bandwidth: float |
| 53 | +) -> np.mat: |
| 54 | + """ |
| 55 | + Calculate predictions for each data point on axis. |
| 56 | + >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68], |
| 57 | + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) |
| 58 | + array([1.07173261, 1.65970737, 3.50160179]) |
| 59 | + """ |
| 60 | + m, n = np.shape(training_data_x) |
| 61 | + ypred = np.zeros(m) |
| 62 | + |
| 63 | + for i, item in enumerate(training_data_x): |
| 64 | + ypred[i] = item * local_weight( |
| 65 | + item, training_data_x, training_data_y, bandwidth |
| 66 | + ) |
| 67 | + |
| 68 | + return ypred |
| 69 | + |
| 70 | + |
| 71 | +def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat: |
| 72 | + """ |
| 73 | + Function used for loading data from the seaborn splitting into x and y points |
| 74 | + >>> pass # this function has no doctest |
| 75 | + """ |
| 76 | + import seaborn as sns |
| 77 | + |
| 78 | + data = sns.load_dataset(dataset_name) |
| 79 | + col_a = np.array(data[cola_name]) # total_bill |
| 80 | + col_b = np.array(data[colb_name]) # tip |
| 81 | + |
| 82 | + mcol_a = np.mat(col_a) |
| 83 | + mcol_b = np.mat(col_b) |
| 84 | + |
| 85 | + m = np.shape(mcol_b)[1] |
| 86 | + one = np.ones((1, m), dtype=int) |
| 87 | + |
| 88 | + # horizontal stacking |
| 89 | + training_data_x = np.hstack((one.T, mcol_a.T)) |
| 90 | + |
| 91 | + return training_data_x, mcol_b, col_a, col_b |
| 92 | + |
| 93 | + |
| 94 | +def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray: |
| 95 | + """ |
| 96 | + Get predictions with minimum error for each training data |
| 97 | + >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68], |
| 98 | + ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6) |
| 99 | + array([1.07173261, 1.65970737, 3.50160179]) |
| 100 | + """ |
| 101 | + ypred = local_weight_regression(training_data_x, mcol_b, tau) |
| 102 | + return ypred |
| 103 | + |
| 104 | + |
| 105 | +def plot_preds( |
| 106 | + training_data_x: np.mat, |
| 107 | + predictions: np.ndarray, |
| 108 | + col_x: np.ndarray, |
| 109 | + col_y: np.ndarray, |
| 110 | + cola_name: str, |
| 111 | + colb_name: str, |
| 112 | +) -> plt.plot: |
| 113 | + """ |
| 114 | + This function used to plot predictions and display the graph |
| 115 | + >>> pass #this function has no doctest |
| 116 | + """ |
| 117 | + xsort = training_data_x.copy() |
| 118 | + xsort.sort(axis=0) |
| 119 | + plt.scatter(col_x, col_y, color="blue") |
| 120 | + plt.plot( |
| 121 | + xsort[:, 1], |
| 122 | + predictions[training_data_x[:, 1].argsort(0)], |
| 123 | + color="yellow", |
| 124 | + linewidth=5, |
| 125 | + ) |
| 126 | + plt.title("Local Weighted Regression") |
| 127 | + plt.xlabel(cola_name) |
| 128 | + plt.ylabel(colb_name) |
| 129 | + plt.show() |
| 130 | + |
| 131 | + |
| 132 | +if __name__ == "__main__": |
| 133 | + training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip") |
| 134 | + predictions = get_preds(training_data_x, mcol_b, 0.5) |
| 135 | + plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip") |
0 commit comments