updated config map and lora syncer init container

nirrozenbaum · nirrozenbaum · commit 057d1761005e · 2025-03-19T11:13:53.000+02:00
Signed-off-by: Nir Rozenbaum &lt;nirro@il.ibm.com&gt;
diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml
@@ -73,6 +73,19 @@ spec:
               name: shm
             - name: adapters
               mountPath: "/adapters"
+      initContainers:
+        - name: lora-adapter-syncer
+          tty: true
+          stdin: true
+          image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/lora-syncer:main
+          restartPolicy: Always
+          imagePullPolicy: Always
+          env:
+            - name: DYNAMIC_LORA_ROLLOUT_CONFIG
+              value: "/config/configmap.yaml"
+          volumeMounts: # DO NOT USE subPath, dynamic configmap updates don't work on subPaths
+          - name: config-volume
+            mountPath:  /config
       restartPolicy: Always
       schedulerName: default-scheduler
       terminationGracePeriodSeconds: 30
@@ -84,3 +97,21 @@ spec:
             medium: Memory
         - name: adapters
           emptyDir: {}
+        - name: config-volume
+          configMap:
+            name: vllm-qwen-adapters
+---
+apiVersion: v1
+kind: ConfigMap
+metadata:
+  name: vllm-qwen-adapters
+data:
+  configmap.yaml: |
+      vLLMLoRAConfig:
+        name: vllm-llama2-7b
+        port: 8000
+        ensureExist:
+          models:
+          - base-model: Qwen/Qwen2.5-1.5B
+            id: tweet-summary-1
+            source: SriSanth2345/Qwen-1.5B-Tweet-Generations
diff --git a/site-src/guides/index.md b/site-src/guides/index.md
@@ -35,7 +35,6 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
       kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml
       ```
 
-
 === "CPU-Based Model Server"
 
       This setup is using the formal `vllm-cpu` image, which according to the documentation can run vLLM on x86 CPU platform.