From 95ddf2e07095a86b7aa0cb85481d0b54ec68c97d Mon Sep 17 00:00:00 2001 From: Anshuman Date: Mon, 3 Feb 2025 10:08:32 +0530 Subject: [PATCH] Reduced GPU requirements --- pkg/manifests/vllm/deployment.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/manifests/vllm/deployment.yaml b/pkg/manifests/vllm/deployment.yaml index 30f6f6711..09aeb002d 100644 --- a/pkg/manifests/vllm/deployment.yaml +++ b/pkg/manifests/vllm/deployment.yaml @@ -16,7 +16,7 @@ kind: Deployment metadata: name: vllm-llama2-7b-pool spec: - replicas: 3 + replicas: 1 selector: matchLabels: app: vllm-llama2-7b-pool @@ -39,7 +39,7 @@ spec: - "8000" - "--enable-lora" - "--max-loras" - - "4" + - "2" - "--max-cpu-loras" - "12" - "--lora-modules"