File tree 2 files changed +31
-1
lines changed 2 files changed +31
-1
lines changed Original file line number Diff line number Diff line change 73
73
name : shm
74
74
- name : adapters
75
75
mountPath : " /adapters"
76
+ initContainers :
77
+ - name : lora-adapter-syncer
78
+ tty : true
79
+ stdin : true
80
+ image : us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
81
+ restartPolicy : Always
82
+ imagePullPolicy : Always
83
+ env :
84
+ - name : DYNAMIC_LORA_ROLLOUT_CONFIG
85
+ value : " /config/configmap.yaml"
86
+ volumeMounts : # DO NOT USE subPath, dynamic configmap updates don't work on subPaths
87
+ - name : config-volume
88
+ mountPath : /config
76
89
restartPolicy : Always
77
90
schedulerName : default-scheduler
78
91
terminationGracePeriodSeconds : 30
84
97
medium : Memory
85
98
- name : adapters
86
99
emptyDir : {}
100
+ - name : config-volume
101
+ configMap :
102
+ name : vllm-qwen-adapters
103
+ ---
104
+ apiVersion : v1
105
+ kind : ConfigMap
106
+ metadata :
107
+ name : vllm-qwen-adapters
108
+ data :
109
+ configmap.yaml : |
110
+ vLLMLoRAConfig:
111
+ name: vllm-llama2-7b
112
+ port: 8000
113
+ ensureExist:
114
+ models:
115
+ - base-model: Qwen/Qwen2.5-1.5B
116
+ id: tweet-summary-1
117
+ source: SriSanth2345/Qwen-1.5B-Tweet-Generations
Original file line number Diff line number Diff line change @@ -35,7 +35,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
35
35
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml
36
36
```
37
37
38
-
39
38
=== "CPU-Based Model Server"
40
39
41
40
This setup is using the formal `vllm-cpu` image, which according to the documentation can run vLLM on x86 CPU platform.
You can’t perform that action at this time.
0 commit comments