1
+ """
2
+ Name - - LSTM - Long Short-Term Memory Network For Sequence Prediction
3
+ Goal - - Predict sequences of data
4
+ Detail: Total 3 layers neural network
5
+ * Input layer
6
+ * LSTM layer
7
+ * Output layer
8
+ Author: Shashank Tyagi
9
+ Github: LEVII007
10
+ Date: [Current Date]
11
+ """
12
+
1
13
##### Explanation #####
2
14
# This script implements a Long Short-Term Memory (LSTM) network to learn and predict sequences of characters.
3
15
# It uses numpy for numerical operations and tqdm for progress visualization.
22
34
# The script initializes the LSTM network with specified hyperparameters and trains it on the input data.
23
35
# Finally, it tests the trained network and prints the accuracy of the predictions.
24
36
25
- ##### Data #####
26
-
27
37
##### Imports #####
28
38
from tqdm import tqdm
29
39
import numpy as np
30
40
31
41
class LSTM :
32
- def __init__ (self , data , hidden_dim = 25 , epochs = 1000 , lr = 0.05 ):
42
+ def __init__ (self , data : str , hidden_dim : int = 25 , epochs : int = 1000 , lr : float = 0.05 ) -> None :
43
+ """
44
+ Initialize the LSTM network with the given data and hyperparameters.
45
+
46
+ :param data: The input data as a string.
47
+ :param hidden_dim: The number of hidden units in the LSTM layer.
48
+ :param epochs: The number of training epochs.
49
+ :param lr: The learning rate.
50
+ """
33
51
self .data = data .lower ()
34
52
self .hidden_dim = hidden_dim
35
53
self .epochs = epochs
@@ -48,12 +66,21 @@ def __init__(self, data, hidden_dim=25, epochs=1000, lr=0.05):
48
66
self .initialize_weights ()
49
67
50
68
##### Helper Functions #####
51
- def one_hot_encode (self , char ):
69
+ def one_hot_encode (self , char : str ) -> np .ndarray :
70
+ """
71
+ One-hot encode a character.
72
+
73
+ :param char: The character to encode.
74
+ :return: A one-hot encoded vector.
75
+ """
52
76
vector = np .zeros ((self .char_size , 1 ))
53
77
vector [self .char_to_idx [char ]] = 1
54
78
return vector
55
79
56
- def initialize_weights (self ):
80
+ def initialize_weights (self ) -> None :
81
+ """
82
+ Initialize the weights and biases for the LSTM network.
83
+ """
57
84
self .wf = self .init_weights (self .char_size + self .hidden_dim , self .hidden_dim )
58
85
self .bf = np .zeros ((self .hidden_dim , 1 ))
59
86
@@ -69,26 +96,56 @@ def initialize_weights(self):
69
96
self .wy = self .init_weights (self .hidden_dim , self .char_size )
70
97
self .by = np .zeros ((self .char_size , 1 ))
71
98
72
- def init_weights (self , input_dim , output_dim ):
99
+ def init_weights (self , input_dim : int , output_dim : int ) -> np .ndarray :
100
+ """
101
+ Initialize weights with random values.
102
+
103
+ :param input_dim: The input dimension.
104
+ :param output_dim: The output dimension.
105
+ :return: A matrix of initialized weights.
106
+ """
73
107
return np .random .uniform (- 1 , 1 , (output_dim , input_dim )) * np .sqrt (6 / (input_dim + output_dim ))
74
108
75
109
##### Activation Functions #####
76
- def sigmoid (self , x , derivative = False ):
110
+ def sigmoid (self , x : np .ndarray , derivative : bool = False ) -> np .ndarray :
111
+ """
112
+ Sigmoid activation function.
113
+
114
+ :param x: The input array.
115
+ :param derivative: Whether to compute the derivative.
116
+ :return: The sigmoid activation or its derivative.
117
+ """
77
118
if derivative :
78
119
return x * (1 - x )
79
120
return 1 / (1 + np .exp (- x ))
80
121
81
- def tanh (self , x , derivative = False ):
122
+ def tanh (self , x : np .ndarray , derivative : bool = False ) -> np .ndarray :
123
+ """
124
+ Tanh activation function.
125
+
126
+ :param x: The input array.
127
+ :param derivative: Whether to compute the derivative.
128
+ :return: The tanh activation or its derivative.
129
+ """
82
130
if derivative :
83
131
return 1 - x ** 2
84
132
return np .tanh (x )
85
133
86
- def softmax (self , x ):
134
+ def softmax (self , x : np .ndarray ) -> np .ndarray :
135
+ """
136
+ Softmax activation function.
137
+
138
+ :param x: The input array.
139
+ :return: The softmax activation.
140
+ """
87
141
exp_x = np .exp (x - np .max (x ))
88
142
return exp_x / exp_x .sum (axis = 0 )
89
143
90
144
##### LSTM Network Methods #####
91
- def reset (self ):
145
+ def reset (self ) -> None :
146
+ """
147
+ Reset the LSTM network states.
148
+ """
92
149
self .concat_inputs = {}
93
150
94
151
self .hidden_states = {- 1 : np .zeros ((self .hidden_dim , 1 ))}
@@ -101,7 +158,13 @@ def reset(self):
101
158
self .input_gates = {}
102
159
self .outputs = {}
103
160
104
- def forward (self , inputs ):
161
+ def forward (self , inputs : list ) -> list :
162
+ """
163
+ Perform forward propagation through the LSTM network.
164
+
165
+ :param inputs: The input data as a list of one-hot encoded vectors.
166
+ :return: The outputs of the network.
167
+ """
105
168
self .reset ()
106
169
107
170
outputs = []
@@ -120,7 +183,13 @@ def forward(self, inputs):
120
183
121
184
return outputs
122
185
123
- def backward (self , errors , inputs ):
186
+ def backward (self , errors : list , inputs : list ) -> None :
187
+ """
188
+ Perform backpropagation through time to compute gradients and update weights.
189
+
190
+ :param errors: The errors at each time step.
191
+ :param inputs: The input data as a list of one-hot encoded vectors.
192
+ """
124
193
d_wf , d_bf = 0 , 0
125
194
d_wi , d_bi = 0 , 0
126
195
d_wc , d_bc = 0 , 0
@@ -186,7 +255,10 @@ def backward(self, errors, inputs):
186
255
self .wy += d_wy * self .lr
187
256
self .by += d_by * self .lr
188
257
189
- def train (self ):
258
+ def train (self ) -> None :
259
+ """
260
+ Train the LSTM network on the input data.
261
+ """
190
262
inputs = [self .one_hot_encode (char ) for char in self .train_X ]
191
263
192
264
for _ in tqdm (range (self .epochs )):
@@ -199,7 +271,10 @@ def train(self):
199
271
200
272
self .backward (errors , self .concat_inputs )
201
273
202
- def test (self ):
274
+ def test (self ) -> None :
275
+ """
276
+ Test the trained LSTM network on the input data and print the accuracy.
277
+ """
203
278
accuracy = 0
204
279
probabilities = self .forward ([self .one_hot_encode (char ) for char in self .train_X ])
205
280
@@ -229,6 +304,14 @@ def test(self):
229
304
##### Testing #####
230
305
# lstm.test()
231
306
307
+ if __name__ == "__main__" :
308
+ # Initialize Network
309
+ # lstm = LSTM(data=data, hidden_dim=25, epochs=1000, lr=0.05)
310
+
311
+ ##### Training #####
312
+ # lstm.train()
232
313
314
+ ##### Testing #####
315
+ # lstm.test()
233
316
234
317
# testing can be done by uncommenting the above lines of code.
0 commit comments