kubernetes-sigs · anshuman-agarwala · Feb 3, 2025 · liu-cong · Feb 3, 2025 · kfswain
diff --git a/pkg/manifests/vllm/deployment.yaml b/pkg/manifests/vllm/deployment.yaml
@@ -16,7 +16,7 @@ kind: Deployment
 metadata:
   name: vllm-llama2-7b-pool
 spec:
-  replicas: 3
+  replicas: 1
   selector:
     matchLabels:
       app: vllm-llama2-7b-pool
@@ -39,7 +39,7 @@ spec:
           - "8000"
           - "--enable-lora"
           - "--max-loras"
-          - "4"
+          - "2"
           - "--max-cpu-loras"
           - "12"
           - "--lora-modules"