1
- # Required imports to run this file
2
1
import matplotlib .pyplot as plt
3
2
import numpy as np
4
3
5
4
6
- # weighted matrix
7
- def weighted_matrix (point : np .mat , training_data_x : np .mat , bandwidth : float ) -> np .mat :
5
+ def weighted_matrix (
6
+ point : np .array , training_data_x : np .array , bandwidth : float
7
+ ) -> np .array :
8
8
"""
9
- Calculate the weight for every point in the
10
- data set. It takes training_point , query_point, and tau
11
- Here Tau is not a fixed value it can be varied depends on output.
12
- tau --> bandwidth
13
- xmat -->Training data
14
- point --> the x where we want to make predictions
15
- >>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
16
- ... [24.59,25.69]]), 0.6)
17
- matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
18
- [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
19
- [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
9
+ Calculate the weight for every point in the data set.
10
+ point --> the x value at which we want to make predictions
11
+ >>> weighted_matrix(
12
+ ... np.array([1., 1.]),
13
+ ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
14
+ ... 0.6
15
+ ... )
16
+ array([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
17
+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
18
+ [0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
20
19
"""
21
- # m is the number of training samples
22
- m , n = np .shape (training_data_x )
23
- # Initializing weights as identity matrix
24
- weights = np .mat (np .eye (m ))
20
+ m , _ = np .shape (training_data_x ) # m is the number of training samples
21
+ weights = np .eye (m ) # Initializing weights as identity matrix
22
+
25
23
# calculating weights for all training examples [x(i)'s]
26
24
for j in range (m ):
27
25
diff = point - training_data_x [j ]
28
- weights [j , j ] = np .exp (diff * diff .T / (- 2.0 * bandwidth ** 2 ))
26
+ weights [j , j ] = np .exp (diff @ diff .T / (- 2.0 * bandwidth ** 2 ))
29
27
return weights
30
28
31
29
32
30
def local_weight (
33
- point : np .mat , training_data_x : np .mat , training_data_y : np .mat , bandwidth : float
34
- ) -> np .mat :
31
+ point : np .array ,
32
+ training_data_x : np .array ,
33
+ training_data_y : np .array ,
34
+ bandwidth : float ,
35
+ ) -> np .array :
35
36
"""
36
37
Calculate the local weights using the weight_matrix function on training data.
37
38
Return the weighted matrix.
38
- >>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
39
- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
40
- matrix([[0.00873174],
41
- [0.08272556]])
39
+ >>> local_weight(
40
+ ... np.array([1., 1.]),
41
+ ... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
42
+ ... np.array([[1.01, 1.66, 3.5]]),
43
+ ... 0.6
44
+ ... )
45
+ array([[0.00873174],
46
+ [0.08272556]])
42
47
"""
43
48
weight = weighted_matrix (point , training_data_x , bandwidth )
44
- w = (training_data_x .T * (weight * training_data_x )). I * (
45
- training_data_x .T * weight * training_data_y .T
49
+ w = np . linalg . inv (training_data_x .T @ (weight @ training_data_x )) @ (
50
+ training_data_x .T @ weight @ training_data_y .T
46
51
)
47
52
48
53
return w
49
54
50
55
51
56
def local_weight_regression (
52
- training_data_x : np .mat , training_data_y : np .mat , bandwidth : float
53
- ) -> np .mat :
57
+ training_data_x : np .array , training_data_y : np .array , bandwidth : float
58
+ ) -> np .array :
54
59
"""
55
- Calculate predictions for each data point on axis.
56
- >>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
57
- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
60
+ Calculate predictions for each data point on axis
61
+ >>> local_weight_regression(
62
+ ... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
63
+ ... np.array([[1.01, 1.66, 3.5]]),
64
+ ... 0.6
65
+ ... )
58
66
array([1.07173261, 1.65970737, 3.50160179])
59
67
"""
60
- m , n = np .shape (training_data_x )
68
+ m , _ = np .shape (training_data_x )
61
69
ypred = np .zeros (m )
62
70
63
71
for i , item in enumerate (training_data_x ):
64
- ypred [i ] = item * local_weight (
72
+ ypred [i ] = item @ local_weight (
65
73
item , training_data_x , training_data_y , bandwidth
66
74
)
67
75
68
76
return ypred
69
77
70
78
71
- def load_data (dataset_name : str , cola_name : str , colb_name : str ) -> np .mat :
79
+ def load_data (
80
+ dataset_name : str , cola_name : str , colb_name : str
81
+ ) -> tuple [np .array , np .array , np .array , np .array ]:
72
82
"""
73
- Function used for loading data from the seaborn splitting into x and y points
83
+ Load data from seaborn and split it into x and y points
74
84
"""
75
85
import seaborn as sns
76
86
77
87
data = sns .load_dataset (dataset_name )
78
88
col_a = np .array (data [cola_name ]) # total_bill
79
89
col_b = np .array (data [colb_name ]) # tip
80
90
81
- mcol_a = np . mat ( col_a )
82
- mcol_b = np . mat ( col_b )
91
+ mcol_a = col_a . copy ( )
92
+ mcol_b = col_b . copy ( )
83
93
84
- m = np .shape (mcol_b )[1 ]
85
- one = np .ones ((1 , m ), dtype = int )
94
+ one = np .ones (np .shape (mcol_b )[0 ], dtype = int )
86
95
87
- # horizontal stacking
88
- training_data_x = np .hstack ((one . T , mcol_a . T ))
96
+ # pairing elements of one and mcol_a
97
+ training_data_x = np .column_stack ((one , mcol_a ))
89
98
90
99
return training_data_x , mcol_b , col_a , col_b
91
100
92
101
93
- def get_preds (training_data_x : np .mat , mcol_b : np .mat , tau : float ) -> np .ndarray :
102
+ def get_preds (training_data_x : np .array , mcol_b : np .array , tau : float ) -> np .array :
94
103
"""
95
104
Get predictions with minimum error for each training data
96
- >>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
97
- ... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
105
+ >>> get_preds(
106
+ ... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
107
+ ... np.array([[1.01, 1.66, 3.5]]),
108
+ ... 0.6
109
+ ... )
98
110
array([1.07173261, 1.65970737, 3.50160179])
99
111
"""
100
112
ypred = local_weight_regression (training_data_x , mcol_b , tau )
101
113
return ypred
102
114
103
115
104
116
def plot_preds (
105
- training_data_x : np .mat ,
106
- predictions : np .ndarray ,
107
- col_x : np .ndarray ,
108
- col_y : np .ndarray ,
117
+ training_data_x : np .array ,
118
+ predictions : np .array ,
119
+ col_x : np .array ,
120
+ col_y : np .array ,
109
121
cola_name : str ,
110
122
colb_name : str ,
111
123
) -> plt .plot :
112
124
"""
113
- This function used to plot predictions and display the graph
125
+ Plot predictions and display the graph
114
126
"""
115
127
xsort = training_data_x .copy ()
116
128
xsort .sort (axis = 0 )
@@ -128,6 +140,10 @@ def plot_preds(
128
140
129
141
130
142
if __name__ == "__main__" :
143
+ import doctest
144
+
145
+ doctest .testmod ()
146
+
131
147
training_data_x , mcol_b , col_a , col_b = load_data ("tips" , "total_bill" , "tip" )
132
148
predictions = get_preds (training_data_x , mcol_b , 0.5 )
133
149
plot_preds (training_data_x , predictions , col_a , col_b , "total_bill" , "tip" )
0 commit comments