From 7afa7c9d92cb361305b9cf07e617d6f7315f4e37 Mon Sep 17 00:00:00 2001 From: Nir Rozenbaum Date: Mon, 17 Mar 2025 12:37:57 +0200 Subject: [PATCH 1/3] switch to formal vllm-cpu image Signed-off-by: Nir Rozenbaum --- config/manifests/vllm/cpu-deployment.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index a0925c837..b858a8cbc 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: lora - image: "seedjeffwan/vllm-cpu-env:bb392af4-20250203" + image: "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.7.2" imagePullPolicy: Always command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] args: @@ -23,6 +23,8 @@ spec: - "--port" - "8000" - "--enable-lora" + - "--max-loras" + - "4" - "--lora-modules" - '{"name": "tweet-summary-0", "path": "/adapters/hub/models--ai-blond--Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811"}' - '{"name": "tweet-summary-1", "path": "/adapters/hub/models--ai-blond--Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811"}' From ee35a933e8e23ef3fd7f5cdd4b4d9f51ce3f35e7 Mon Sep 17 00:00:00 2001 From: Nir Rozenbaum Date: Mon, 17 Mar 2025 12:41:48 +0200 Subject: [PATCH 2/3] documentation of formal vllm-cpu image Signed-off-by: Nir Rozenbaum --- config/manifests/vllm/cpu-deployment.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index b858a8cbc..6e943b54b 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -14,7 +14,7 @@ spec: spec: containers: - name: lora - image: "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.7.2" + image: "public.ecr.aws/q9t5s3a7/vllm-cpu-release-repo:v0.7.2" # formal images can be found in https://gallery.ecr.aws/q9t5s3a7/vllm-cpu-release-repo imagePullPolicy: Always command: ["python3", "-m", "vllm.entrypoints.openai.api_server"] args: From a19e7a357f9a6ffe926f532b1511075738694758 Mon Sep 17 00:00:00 2001 From: Nir Rozenbaum Date: Mon, 17 Mar 2025 14:58:30 +0200 Subject: [PATCH 3/3] minor updates to cpu deployment Signed-off-by: Nir Rozenbaum --- config/manifests/vllm/cpu-deployment.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index 6e943b54b..76865e4cd 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -26,8 +26,8 @@ spec: - "--max-loras" - "4" - "--lora-modules" - - '{"name": "tweet-summary-0", "path": "/adapters/hub/models--ai-blond--Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811"}' - - '{"name": "tweet-summary-1", "path": "/adapters/hub/models--ai-blond--Qwen-Qwen2.5-Coder-1.5B-Instruct-lora/snapshots/9cde18d8ed964b0519fb481cca6acd936b2ca811"}' + - '{"name": "tweet-summary-0", "path": "/adapters/ai-blond/Qwen-Qwen2.5-Coder-1.5B-Instruct-lora_0"}' + - '{"name": "tweet-summary-1", "path": "/adapters/ai-blond/Qwen-Qwen2.5-Coder-1.5B-Instruct-lora_1"}' env: - name: PORT value: "8000" @@ -38,6 +38,8 @@ spec: key: token - name: VLLM_ALLOW_RUNTIME_LORA_UPDATING value: "true" + - name: VLLM_CPU_KVCACHE_SPACE + value: "4" ports: - containerPort: 8000 name: http