File tree 1 file changed +7
-3
lines changed 1 file changed +7
-3
lines changed Original file line number Diff line number Diff line change 14
14
spec :
15
15
containers :
16
16
- name : lora
17
- image : " seedjeffwan/ vllm-cpu-env:bb392af4-20250203 "
17
+ image : " public.ecr.aws/q9t5s3a7/ vllm-cpu-release-repo:v0.7.2 " # formal images can be found in https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo
18
18
imagePullPolicy : Always
19
19
command : ["python3", "-m", "vllm.entrypoints.openai.api_server"]
20
20
args :
23
23
- " --port"
24
24
- " 8000"
25
25
- " --enable-lora"
26
+ - " --max-loras"
27
+ - " 4"
26
28
- " --lora-modules"
27
- - ' {"name": "tweet-summary-0", "path": "/adapters/hub/models-- ai-blond-- Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811 "}'
28
- - ' {"name": "tweet-summary-1", "path": "/adapters/hub/models-- ai-blond-- Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811 "}'
29
+ - ' {"name": "tweet-summary-0", "path": "/adapters/ai-blond/ Qwen-Qwen2.5-Coder-1.5B-Instruct-lora_0 "}'
30
+ - ' {"name": "tweet-summary-1", "path": "/adapters/ai-blond/ Qwen-Qwen2.5-Coder-1.5B-Instruct-lora_1 "}'
29
31
env :
30
32
- name : PORT
31
33
value : " 8000"
36
38
key : token
37
39
- name : VLLM_ALLOW_RUNTIME_LORA_UPDATING
38
40
value : " true"
41
+ - name : VLLM_CPU_KVCACHE_SPACE
42
+ value : " 4"
39
43
ports :
40
44
- containerPort : 8000
41
45
name : http
You can’t perform that action at this time.
0 commit comments