Skip to content

Commit 3e73c87

Browse files
tianyizheng02github-actions
authored andcommitted
Refactor local_weighted_learning.py to use np.array (TheAlgorithms#8069)
* updating DIRECTORY.md * Format local_weighted_learning.py doctests for clarity * Refactor local_weighted_learning.py to use np.array instead of np.mat The np.matrix class is planned to be eventually depreciated in favor of np.array, and current use of the class raises warnings in pytest * Update local_weighted_learning.py documentation Co-authored-by: github-actions <${GITHUB_ACTOR}@users.noreply.github.com>
1 parent f66e62b commit 3e73c87

File tree

2 files changed

+68
-51
lines changed

2 files changed

+68
-51
lines changed

Diff for: DIRECTORY.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,7 @@
123123
* [Huffman](compression/huffman.py)
124124
* [Lempel Ziv](compression/lempel_ziv.py)
125125
* [Lempel Ziv Decompress](compression/lempel_ziv_decompress.py)
126+
* [Lz77](compression/lz77.py)
126127
* [Peak Signal To Noise Ratio](compression/peak_signal_to_noise_ratio.py)
127128
* [Run Length Encoding](compression/run_length_encoding.py)
128129

@@ -1164,7 +1165,7 @@
11641165
* [Get Amazon Product Data](web_programming/get_amazon_product_data.py)
11651166
* [Get Imdb Top 250 Movies Csv](web_programming/get_imdb_top_250_movies_csv.py)
11661167
* [Get Imdbtop](web_programming/get_imdbtop.py)
1167-
* [Get Top Billioners](web_programming/get_top_billioners.py)
1168+
* [Get Top Billionaires](web_programming/get_top_billionaires.py)
11681169
* [Get Top Hn Posts](web_programming/get_top_hn_posts.py)
11691170
* [Get User Tweets](web_programming/get_user_tweets.py)
11701171
* [Giphy](web_programming/giphy.py)
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,128 @@
1-
# Required imports to run this file
21
import matplotlib.pyplot as plt
32
import numpy as np
43

54

6-
# weighted matrix
7-
def weighted_matrix(point: np.mat, training_data_x: np.mat, bandwidth: float) -> np.mat:
5+
def weighted_matrix(
6+
point: np.array, training_data_x: np.array, bandwidth: float
7+
) -> np.array:
88
"""
9-
Calculate the weight for every point in the
10-
data set. It takes training_point , query_point, and tau
11-
Here Tau is not a fixed value it can be varied depends on output.
12-
tau --> bandwidth
13-
xmat -->Training data
14-
point --> the x where we want to make predictions
15-
>>> weighted_matrix(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
16-
... [24.59,25.69]]), 0.6)
17-
matrix([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
18-
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
19-
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
9+
Calculate the weight for every point in the data set.
10+
point --> the x value at which we want to make predictions
11+
>>> weighted_matrix(
12+
... np.array([1., 1.]),
13+
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
14+
... 0.6
15+
... )
16+
array([[1.43807972e-207, 0.00000000e+000, 0.00000000e+000],
17+
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000],
18+
[0.00000000e+000, 0.00000000e+000, 0.00000000e+000]])
2019
"""
21-
# m is the number of training samples
22-
m, n = np.shape(training_data_x)
23-
# Initializing weights as identity matrix
24-
weights = np.mat(np.eye(m))
20+
m, _ = np.shape(training_data_x) # m is the number of training samples
21+
weights = np.eye(m) # Initializing weights as identity matrix
22+
2523
# calculating weights for all training examples [x(i)'s]
2624
for j in range(m):
2725
diff = point - training_data_x[j]
28-
weights[j, j] = np.exp(diff * diff.T / (-2.0 * bandwidth**2))
26+
weights[j, j] = np.exp(diff @ diff.T / (-2.0 * bandwidth**2))
2927
return weights
3028

3129

3230
def local_weight(
33-
point: np.mat, training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
34-
) -> np.mat:
31+
point: np.array,
32+
training_data_x: np.array,
33+
training_data_y: np.array,
34+
bandwidth: float,
35+
) -> np.array:
3536
"""
3637
Calculate the local weights using the weight_matrix function on training data.
3738
Return the weighted matrix.
38-
>>> local_weight(np.array([1., 1.]),np.mat([[16.99, 10.34], [21.01,23.68],
39-
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
40-
matrix([[0.00873174],
41-
[0.08272556]])
39+
>>> local_weight(
40+
... np.array([1., 1.]),
41+
... np.array([[16.99, 10.34], [21.01,23.68], [24.59,25.69]]),
42+
... np.array([[1.01, 1.66, 3.5]]),
43+
... 0.6
44+
... )
45+
array([[0.00873174],
46+
[0.08272556]])
4247
"""
4348
weight = weighted_matrix(point, training_data_x, bandwidth)
44-
w = (training_data_x.T * (weight * training_data_x)).I * (
45-
training_data_x.T * weight * training_data_y.T
49+
w = np.linalg.inv(training_data_x.T @ (weight @ training_data_x)) @ (
50+
training_data_x.T @ weight @ training_data_y.T
4651
)
4752

4853
return w
4954

5055

5156
def local_weight_regression(
52-
training_data_x: np.mat, training_data_y: np.mat, bandwidth: float
53-
) -> np.mat:
57+
training_data_x: np.array, training_data_y: np.array, bandwidth: float
58+
) -> np.array:
5459
"""
55-
Calculate predictions for each data point on axis.
56-
>>> local_weight_regression(np.mat([[16.99, 10.34], [21.01,23.68],
57-
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
60+
Calculate predictions for each data point on axis
61+
>>> local_weight_regression(
62+
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
63+
... np.array([[1.01, 1.66, 3.5]]),
64+
... 0.6
65+
... )
5866
array([1.07173261, 1.65970737, 3.50160179])
5967
"""
60-
m, n = np.shape(training_data_x)
68+
m, _ = np.shape(training_data_x)
6169
ypred = np.zeros(m)
6270

6371
for i, item in enumerate(training_data_x):
64-
ypred[i] = item * local_weight(
72+
ypred[i] = item @ local_weight(
6573
item, training_data_x, training_data_y, bandwidth
6674
)
6775

6876
return ypred
6977

7078

71-
def load_data(dataset_name: str, cola_name: str, colb_name: str) -> np.mat:
79+
def load_data(
80+
dataset_name: str, cola_name: str, colb_name: str
81+
) -> tuple[np.array, np.array, np.array, np.array]:
7282
"""
73-
Function used for loading data from the seaborn splitting into x and y points
83+
Load data from seaborn and split it into x and y points
7484
"""
7585
import seaborn as sns
7686

7787
data = sns.load_dataset(dataset_name)
7888
col_a = np.array(data[cola_name]) # total_bill
7989
col_b = np.array(data[colb_name]) # tip
8090

81-
mcol_a = np.mat(col_a)
82-
mcol_b = np.mat(col_b)
91+
mcol_a = col_a.copy()
92+
mcol_b = col_b.copy()
8393

84-
m = np.shape(mcol_b)[1]
85-
one = np.ones((1, m), dtype=int)
94+
one = np.ones(np.shape(mcol_b)[0], dtype=int)
8695

87-
# horizontal stacking
88-
training_data_x = np.hstack((one.T, mcol_a.T))
96+
# pairing elements of one and mcol_a
97+
training_data_x = np.column_stack((one, mcol_a))
8998

9099
return training_data_x, mcol_b, col_a, col_b
91100

92101

93-
def get_preds(training_data_x: np.mat, mcol_b: np.mat, tau: float) -> np.ndarray:
102+
def get_preds(training_data_x: np.array, mcol_b: np.array, tau: float) -> np.array:
94103
"""
95104
Get predictions with minimum error for each training data
96-
>>> get_preds(np.mat([[16.99, 10.34], [21.01,23.68],
97-
... [24.59,25.69]]),np.mat([[1.01, 1.66, 3.5]]), 0.6)
105+
>>> get_preds(
106+
... np.array([[16.99, 10.34], [21.01, 23.68], [24.59, 25.69]]),
107+
... np.array([[1.01, 1.66, 3.5]]),
108+
... 0.6
109+
... )
98110
array([1.07173261, 1.65970737, 3.50160179])
99111
"""
100112
ypred = local_weight_regression(training_data_x, mcol_b, tau)
101113
return ypred
102114

103115

104116
def plot_preds(
105-
training_data_x: np.mat,
106-
predictions: np.ndarray,
107-
col_x: np.ndarray,
108-
col_y: np.ndarray,
117+
training_data_x: np.array,
118+
predictions: np.array,
119+
col_x: np.array,
120+
col_y: np.array,
109121
cola_name: str,
110122
colb_name: str,
111123
) -> plt.plot:
112124
"""
113-
This function used to plot predictions and display the graph
125+
Plot predictions and display the graph
114126
"""
115127
xsort = training_data_x.copy()
116128
xsort.sort(axis=0)
@@ -128,6 +140,10 @@ def plot_preds(
128140

129141

130142
if __name__ == "__main__":
143+
import doctest
144+
145+
doctest.testmod()
146+
131147
training_data_x, mcol_b, col_a, col_b = load_data("tips", "total_bill", "tip")
132148
predictions = get_preds(training_data_x, mcol_b, 0.5)
133149
plot_preds(training_data_x, predictions, col_a, col_b, "total_bill", "tip")

0 commit comments

Comments
 (0)