[pre-commit.ci] auto fixes from pre-commit.com hooks

pre-commit-ci[bot] · pre-commit-ci[bot] · commit 39fd7135430f · 2024-10-15T05:08:46.000Z
for more information, see https://pre-commit.ci
diff --git a/neural_network/lstm.py b/neural_network/lstm.py
@@ -11,45 +11,47 @@
 """
 
 ##### Explanation #####
-# This script implements a Long Short-Term Memory (LSTM) network to learn 
+# This script implements a Long Short-Term Memory (LSTM) network to learn
 # and predict sequences of characters.
 # It uses numpy for numerical operations and tqdm for progress visualization.
 
-# The data is a paragraph about LSTM, converted to lowercase and split into 
+# The data is a paragraph about LSTM, converted to lowercase and split into
 # characters. Each character is one-hot encoded for training.
 
-# The LSTM class initializes weights and biases for the forget, input, candidate, 
+# The LSTM class initializes weights and biases for the forget, input, candidate,
 # and output gates. It also initializes weights and biases for the final output layer.
 
-# The forward method performs forward propagation through the LSTM network, 
-# computing hidden and cell states. It uses sigmoid and tanh activation 
+# The forward method performs forward propagation through the LSTM network,
+# computing hidden and cell states. It uses sigmoid and tanh activation
 # functions for the gates and cell states.
 
-# The backward method performs backpropagation through time, computing gradients 
-# for the weights and biases. It updates the weights and biases using 
+# The backward method performs backpropagation through time, computing gradients
+# for the weights and biases. It updates the weights and biases using
 # the computed gradients and the learning rate.
 
-# The train method trains the LSTM network on the input data for a specified 
-# number of epochs. It uses one-hot encoded inputs and computes errors 
+# The train method trains the LSTM network on the input data for a specified
+# number of epochs. It uses one-hot encoded inputs and computes errors
 # using the softmax function.
 
-# The test method evaluates the trained LSTM network on the input data, 
+# The test method evaluates the trained LSTM network on the input data,
 # computing accuracy based on predictions.
 
-# The script initializes the LSTM network with specified hyperparameters 
-# and trains it on the input data. Finally, it tests the trained network 
+# The script initializes the LSTM network with specified hyperparameters
+# and trains it on the input data. Finally, it tests the trained network
 # and prints the accuracy of the predictions.
 
 ##### Imports #####
 from tqdm import tqdm
 import numpy as np
 
+
 class LSTM:
-    def __init__(self, data: str, hidden_dim: int = 25, 
-                 epochs: int = 1000, lr: float = 0.05) -> None:
+    def __init__(
+        self, data: str, hidden_dim: int = 25, epochs: int = 1000, lr: float = 0.05
+    ) -> None:
         """
         Initialize the LSTM network with the given data and hyperparameters.
-        
+
         :param data: The input data as a string.
         :param hidden_dim: The number of hidden units in the LSTM layer.
         :param epochs: The number of training epochs.
@@ -63,7 +65,7 @@ def __init__(self, data: str, hidden_dim: int = 25,
         self.chars = set(self.data)
         self.data_size, self.char_size = len(self.data), len(self.chars)
 
-        print(f'Data size: {self.data_size}, Char Size: {self.char_size}')
+        print(f"Data size: {self.data_size}, Char Size: {self.char_size}")
 
         self.char_to_idx = {c: i for i, c in enumerate(self.chars)}
         self.idx_to_char = {i: c for i, c in enumerate(self.chars)}
@@ -76,7 +78,7 @@ def __init__(self, data: str, hidden_dim: int = 25,
     def one_hot_encode(self, char: str) -> np.ndarray:
         """
         One-hot encode a character.
-        
+
         :param char: The character to encode.
         :return: A one-hot encoded vector.
         """
@@ -88,20 +90,16 @@ def initialize_weights(self) -> None:
         """
         Initialize the weights and biases for the LSTM network.
         """
-        self.wf = self.init_weights(self.char_size + self.hidden_dim, 
-                                    self.hidden_dim)
+        self.wf = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
         self.bf = np.zeros((self.hidden_dim, 1))
 
-        self.wi = self.init_weights(self.char_size + self.hidden_dim, 
-                                    self.hidden_dim)
+        self.wi = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
         self.bi = np.zeros((self.hidden_dim, 1))
 
-        self.wc = self.init_weights(self.char_size + self.hidden_dim, 
-                                    self.hidden_dim)
+        self.wc = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
         self.bc = np.zeros((self.hidden_dim, 1))
 
-        self.wo = self.init_weights(self.char_size + self.hidden_dim, 
-                                    self.hidden_dim)
+        self.wo = self.init_weights(self.char_size + self.hidden_dim, self.hidden_dim)
         self.bo = np.zeros((self.hidden_dim, 1))
 
         self.wy = self.init_weights(self.hidden_dim, self.char_size)
@@ -110,19 +108,20 @@ def initialize_weights(self) -> None:
     def init_weights(self, input_dim: int, output_dim: int) -> np.ndarray:
         """
         Initialize weights with random values.
-        
+
         :param input_dim: The input dimension.
         :param output_dim: The output dimension.
         :return: A matrix of initialized weights.
         """
-        return np.random.uniform(-1, 1, (output_dim, input_dim)) * \
-               np.sqrt(6 / (input_dim + output_dim))
+        return np.random.uniform(-1, 1, (output_dim, input_dim)) * np.sqrt(
+            6 / (input_dim + output_dim)
+        )
 
     ##### Activation Functions #####
     def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
         """
         Sigmoid activation function.
-        
+
         :param x: The input array.
         :param derivative: Whether to compute the derivative.
         :return: The sigmoid activation or its derivative.
@@ -134,19 +133,19 @@ def sigmoid(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
     def tanh(self, x: np.ndarray, derivative: bool = False) -> np.ndarray:
         """
         Tanh activation function.
-        
+
         :param x: The input array.
         :param derivative: Whether to compute the derivative.
         :return: The tanh activation or its derivative.
         """
         if derivative:
-            return 1 - x ** 2
+            return 1 - x**2
         return np.tanh(x)
 
     def softmax(self, x: np.ndarray) -> np.ndarray:
         """
         Softmax activation function.
-        
+
         :param x: The input array.
         :return: The softmax activation.
         """
@@ -173,7 +172,7 @@ def reset(self) -> None:
     def forward(self, inputs: list) -> list:
         """
         Perform forward propagation through the LSTM network.
-        
+
         :param inputs: The input data as a list of one-hot encoded vectors.
         :return: The outputs of the network.
         """
@@ -182,21 +181,29 @@ def forward(self, inputs: list) -> list:
         outputs = []
         for t in range(len(inputs)):
             self.concat_inputs[t] = np.concatenate(
-                (self.hidden_states[t - 1], inputs[t]))
-
-            self.forget_gates[t] = self.sigmoid(np.dot(self.wf, 
-                                           self.concat_inputs[t]) + self.bf)
-            self.input_gates[t] = self.sigmoid(np.dot(self.wi, 
-                                          self.concat_inputs[t]) + self.bi)
-            self.candidate_gates[t] = self.tanh(np.dot(self.wc, 
-                                          self.concat_inputs[t]) + self.bc)
-            self.output_gates[t] = self.sigmoid(np.dot(self.wo, 
-                                          self.concat_inputs[t]) + self.bo)
-
-            self.cell_states[t] = self.forget_gates[t] * self.cell_states[t - 1] + \
-                                  self.input_gates[t] * self.candidate_gates[t]
-            self.hidden_states[t] = self.output_gates[t] * \
-                                    self.tanh(self.cell_states[t])
+                (self.hidden_states[t - 1], inputs[t])
+            )
+
+            self.forget_gates[t] = self.sigmoid(
+                np.dot(self.wf, self.concat_inputs[t]) + self.bf
+            )
+            self.input_gates[t] = self.sigmoid(
+                np.dot(self.wi, self.concat_inputs[t]) + self.bi
+            )
+            self.candidate_gates[t] = self.tanh(
+                np.dot(self.wc, self.concat_inputs[t]) + self.bc
+            )
+            self.output_gates[t] = self.sigmoid(
+                np.dot(self.wo, self.concat_inputs[t]) + self.bo
+            )
+
+            self.cell_states[t] = (
+                self.forget_gates[t] * self.cell_states[t - 1]
+                + self.input_gates[t] * self.candidate_gates[t]
+            )
+            self.hidden_states[t] = self.output_gates[t] * self.tanh(
+                self.cell_states[t]
+            )
 
             outputs.append(np.dot(self.wy, self.hidden_states[t]) + self.by)
 
@@ -205,7 +212,7 @@ def forward(self, inputs: list) -> list:
     def backward(self, errors: list, inputs: list) -> None:
         """
         Perform backpropagation through time to compute gradients and update weights.
-        
+
         :param errors: The errors at each time step.
         :param inputs: The input data as a list of one-hot encoded vectors.
         """
@@ -215,8 +222,10 @@ def backward(self, errors: list, inputs: list) -> None:
         d_wo, d_bo = 0, 0
         d_wy, d_by = 0, 0
 
-        dh_next, dc_next = np.zeros_like(self.hidden_states[0]), \
-                           np.zeros_like(self.cell_states[0])
+        dh_next, dc_next = (
+            np.zeros_like(self.hidden_states[0]),
+            np.zeros_like(self.cell_states[0]),
+        )
         for t in reversed(range(len(inputs))):
             error = errors[t]
 
@@ -228,45 +237,69 @@ def backward(self, errors: list, inputs: list) -> None:
             d_hs = np.dot(self.wy.T, error) + dh_next
 
             # Output Gate Weights and Biases Errors
-            d_o = self.tanh(self.cell_states[t]) * d_hs * \
-                  self.sigmoid(self.output_gates[t], derivative=True)
+            d_o = (
+                self.tanh(self.cell_states[t])
+                * d_hs
+                * self.sigmoid(self.output_gates[t], derivative=True)
+            )
             d_wo += np.dot(d_o, inputs[t].T)
             d_bo += d_o
 
             # Cell State Error
-            d_cs = self.tanh(self.tanh(self.cell_states[t]), 
-                             derivative=True) * self.output_gates[t] * d_hs + dc_next
+            d_cs = (
+                self.tanh(self.tanh(self.cell_states[t]), derivative=True)
+                * self.output_gates[t]
+                * d_hs
+                + dc_next
+            )
 
             # Forget Gate Weights and Biases Errors
-            d_f = d_cs * self.cell_states[t - 1] * \
-                  self.sigmoid(self.forget_gates[t], derivative=True)
+            d_f = (
+                d_cs
+                * self.cell_states[t - 1]
+                * self.sigmoid(self.forget_gates[t], derivative=True)
+            )
             d_wf += np.dot(d_f, inputs[t].T)
             d_bf += d_f
 
             # Input Gate Weights and Biases Errors
-            d_i = d_cs * self.candidate_gates[t] * \
-                  self.sigmoid(self.input_gates[t], derivative=True)
+            d_i = (
+                d_cs
+                * self.candidate_gates[t]
+                * self.sigmoid(self.input_gates[t], derivative=True)
+            )
             d_wi += np.dot(d_i, inputs[t].T)
             d_bi += d_i
 
             # Candidate Gate Weights and Biases Errors
-            d_c = d_cs * self.input_gates[t] * self.tanh(self.candidate_gates[t], 
-                              derivative=True)
+            d_c = (
+                d_cs
+                * self.input_gates[t]
+                * self.tanh(self.candidate_gates[t], derivative=True)
+            )
             d_wc += np.dot(d_c, inputs[t].T)
             d_bc += d_c
 
             # Update the next hidden and cell state errors
-            dh_next = np.dot(self.wf.T, d_f) + np.dot(self.wi.T, d_i) + \
-                      np.dot(self.wo.T, d_o) + np.dot(self.wc.T, d_c)
+            dh_next = (
+                np.dot(self.wf.T, d_f)
+                + np.dot(self.wi.T, d_i)
+                + np.dot(self.wo.T, d_o)
+                + np.dot(self.wc.T, d_c)
+            )
             dc_next = d_cs * self.forget_gates[t]
 
         # Apply gradients to weights and biases
-        for param, grad in zip([self.wf, self.wi, self.wc, self.wo, self.wy],
-                               [d_wf, d_wi, d_wc, d_wo, d_wy]):
+        for param, grad in zip(
+            [self.wf, self.wi, self.wc, self.wo, self.wy],
+            [d_wf, d_wi, d_wc, d_wo, d_wy],
+        ):
             param -= self.lr * grad
 
-        for param, grad in zip([self.bf, self.bi, self.bc, self.bo, self.by],
-                               [d_bf, d_bi, d_bc, d_bo, d_by]):
+        for param, grad in zip(
+            [self.bf, self.bi, self.bc, self.bo, self.by],
+            [d_bf, d_bi, d_bc, d_bo, d_by],
+        ):
             param -= self.lr * grad
 
     def train(self) -> None:
@@ -289,7 +322,7 @@ def train(self) -> None:
     def predict(self, inputs: list) -> str:
         """
         Predict the next character in the sequence.
-        
+
         :param inputs: The input data as a list of one-hot encoded vectors.
         :return: The predicted character.
         """
@@ -301,11 +334,13 @@ def test(self) -> None:
         Test the LSTM network on the input data and compute accuracy.
         """
         inputs = [self.one_hot_encode(char) for char in self.train_X]
-        correct_predictions = sum(self.idx_to_char[np.argmax(self.softmax(output))] == target
-                                  for output, target in zip(self.forward(inputs), self.train_y))
+        correct_predictions = sum(
+            self.idx_to_char[np.argmax(self.softmax(output))] == target
+            for output, target in zip(self.forward(inputs), self.train_y)
+        )
 
         accuracy = (correct_predictions / len(self.train_y)) * 100
-        print(f'Accuracy: {accuracy:.2f}%')
+        print(f"Accuracy: {accuracy:.2f}%")
 
 
 if __name__ == "__main__":