Skip to content

Commit 37555cf

Browse files
fix: max_past default value must be -1, not 0 (#1348)
1 parent 9b78a6e commit 37555cf

File tree

3 files changed

+5
-2
lines changed

3 files changed

+5
-2
lines changed

server/text_generation_server/models/custom_modeling/flash_mistral_modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ def __init__(
149149
):
150150
super().__init__()
151151
self.max_past = (
152-
config.sliding_window if config.sliding_window is not None else 0
152+
config.sliding_window if config.sliding_window is not None else -1
153153
)
154154
self.num_heads = config.num_attention_heads
155155
self.hidden_size = config.hidden_size

server/text_generation_server/models/custom_modeling/flash_mixtral_modeling.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ def __init__(
204204
):
205205
super().__init__()
206206
self.max_past = (
207-
config.sliding_window if config.sliding_window is not None else 0
207+
config.sliding_window if config.sliding_window is not None else -1
208208
)
209209
self.num_heads = config.num_attention_heads
210210
self.hidden_size = config.hidden_size

server/text_generation_server/utils/flash_attn.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,9 @@ def attention(
7272
softmax_scale,
7373
window_size_left=-1,
7474
):
75+
if window_size_left <= 0 and window_size_left != -1:
76+
raise ValueError("`window_size_left` must be > 0 or -1")
77+
7578
if HAS_FLASH_ATTN_V2_CUDA:
7679
return flash_attn_2_cuda.varlen_fwd(
7780
q,

0 commit comments

Comments
 (0)