Skip to content

Commit 9e9a313

Browse files
authored
Add files via upload
1 parent b3c2a73 commit 9e9a313

File tree

1 file changed

+18
-14
lines changed

1 file changed

+18
-14
lines changed

neural_network/sliding_window_attention.py

+18-14
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
"""
22
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
33
Name - - sliding_window_attention.py
4-
Goal - - Implement a neural network architecture using sliding window attention for sequence modeling tasks.
4+
Goal - - Implement a neural network architecture using sliding window attention for sequence
5+
modeling tasks.
56
Detail: Total 5 layers neural network
67
* Input layer
78
* Sliding Window Attention Layer
@@ -11,8 +12,10 @@
1112
1213
Date: 2024.10.20
1314
References:
14-
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
15-
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers with Linear Attention." *arXiv preprint arXiv:2006.16236*.
15+
1. Choromanska, A., et al. (2020). "On the Importance of Initialization and Momentum in
16+
Deep Learning." *Proceedings of the 37th International Conference on Machine Learning*.
17+
2. Dai, Z., et al. (2020). "Transformers are RNNs: Fast Autoregressive Transformers
18+
with Linear Attention." *arXiv preprint arXiv:2006.16236*.
1619
3. [Attention Mechanisms in Neural Networks](https://en.wikipedia.org/wiki/Attention_(machine_learning))
1720
- - - - - -- - - - - - - - - - - - - - - - - - - - - - -
1821
"""
@@ -31,7 +34,7 @@ class SlidingWindowAttention:
3134
embed_dim (int): The dimensionality of the input embeddings.
3235
"""
3336

34-
def __init__(self, embed_dim: int, window_size: int):
37+
def __init__(self, embed_dim: int, window_size: int) -> None:
3538
"""
3639
Initialize the SlidingWindowAttention module.
3740
@@ -41,14 +44,16 @@ def __init__(self, embed_dim: int, window_size: int):
4144
"""
4245
self.window_size = window_size
4346
self.embed_dim = embed_dim
44-
self.attention_weights = np.random.randn(embed_dim, embed_dim)
47+
rng = np.random.default_rng()
48+
self.attention_weights = rng.standard_normal((embed_dim, embed_dim))
4549

46-
def forward(self, x: np.ndarray) -> np.ndarray:
50+
def forward(self, input_tensor: np.ndarray) -> np.ndarray:
4751
"""
4852
Forward pass for the sliding window attention.
4953
5054
Args:
51-
x (np.ndarray): Input tensor of shape (batch_size, seq_length, embed_dim).
55+
input_tensor (np.ndarray): Input tensor of shape (batch_size, seq_length,
56+
embed_dim).
5257
5358
Returns:
5459
np.ndarray: Output tensor of shape (batch_size, seq_length, embed_dim).
@@ -61,16 +66,16 @@ def forward(self, x: np.ndarray) -> np.ndarray:
6166
>>> (output.sum() != 0).item() # Check if output is non-zero
6267
True
6368
"""
64-
batch_size, seq_length, _ = x.shape
65-
output = np.zeros_like(x)
69+
batch_size, seq_length, _ = input_tensor.shape
70+
output = np.zeros_like(input_tensor)
6671

6772
for i in range(seq_length):
6873
# Define the window range
6974
start = max(0, i - self.window_size // 2)
7075
end = min(seq_length, i + self.window_size // 2 + 1)
7176

7277
# Extract the local window
73-
local_window = x[:, start:end, :]
78+
local_window = input_tensor[:, start:end, :]
7479

7580
# Compute attention scores
7681
attention_scores = np.matmul(local_window, self.attention_weights)
@@ -86,10 +91,9 @@ def forward(self, x: np.ndarray) -> np.ndarray:
8691

8792
doctest.testmod()
8893

89-
# Example usage
90-
x = np.random.randn(
91-
2, 10, 4
92-
) # Batch size 2, sequence length 10, embedding dimension 4
94+
# usage
95+
rng = np.random.default_rng()
96+
x = rng.standard_normal((2, 10, 4)) # Batch size 2, sequence length 10, embedding dimension 4
9397
attention = SlidingWindowAttention(embed_dim=4, window_size=3)
9498
output = attention.forward(x)
9599
print(output)

0 commit comments

Comments
 (0)