Skip to content

Commit 5c06734

Browse files
kfirtoledoclubanderson
authored andcommitted
[build]: Updating vllm deployment to the latest image and scorers (#112)
Update the vLLM P2P deployment to support KV-cache and load scorers. Signed-off-by: Kfir Toledo <[email protected]>
1 parent 012e2b3 commit 5c06734

File tree

3 files changed

+19
-8
lines changed

3 files changed

+19
-8
lines changed

deploy/components/vllm-p2p/vllm-deployment.yaml

+6-3
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,12 @@ spec:
3131
- "-c"
3232
args:
3333
- |
34-
export LMCACHE_DISTRIBUTED_URL=$${${POD_IP}}:80 && \
34+
export LMCACHE_DISTRIBUTED_URL=$${${POD_IP}} && \
3535
vllm serve ${MODEL_NAME} \
3636
--host 0.0.0.0 \
3737
--port 8000 \
38-
--enable-chunked-prefill false \
3938
--max-model-len ${MAX_MODEL_LEN} \
40-
--kv-transfer-config '{"kv_connector":"LMCacheConnector","kv_role":"kv_both"}'
39+
--kv-transfer-config '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
4140
ports:
4241
- name: http
4342
containerPort: 8000
@@ -78,6 +77,10 @@ spec:
7877
secretKeyRef:
7978
name: ${HF_SECRET_NAME}
8079
key: ${HF_SECRET_KEY}
80+
- name: VLLM_ENABLE_V1_MULTIPROCESSING
81+
value: "1"
82+
- name: VLLM_WORKER_MULTIPROC_METHOD
83+
value: spawn
8184
- name: LMCACHE_LOOKUP_URL
8285
value: ${REDIS_HOST}:${REDIS_PORT}
8386
- name: LMCACHE_ENABLE_DEBUG

deploy/environments/dev/kubernetes-kgateway/patch-deployments.yaml

+9-1
Original file line numberDiff line numberDiff line change
@@ -29,4 +29,12 @@ spec:
2929
valueFrom:
3030
secretKeyRef:
3131
name: hf-token
32-
key: ${HF_SECRET_KEY}
32+
key: ${HF_SECRET_KEY}
33+
- name: ENABLE_KVCACHE_AWARE_SCORER
34+
value: "true"
35+
- name: KVCACHE_AWARE_SCORER_WEIGHT
36+
value: "2.0"
37+
- name: ENABLE_LOAD_AWARE_SCORER
38+
value: "true"
39+
- name: LOAD_AWARE_SCORER_WEIGHT
40+
value: "1.0"

scripts/kubernetes-dev-env.sh

+4-4
Original file line numberDiff line numberDiff line change
@@ -65,10 +65,10 @@ case "${VLLM_MODE}" in
6565
export LORA_ADAPTER_SYNCER_TAG="${LORA_ADAPTER_SYNCER_TAG:-v20250425-ddc3d69}"
6666

6767
elif [[ "$VLLM_MODE" == "vllm-p2p" ]]; then
68-
export VLLM_IMAGE="${VLLM_IMAGE:-lmcache/vllm-openai}"
69-
export VLLM_TAG="${VLLM_TAG:-2025-03-10}"
70-
export EPP_IMAGE="${EPP_IMAGE:- quay.io/vmaroon/gateway-api-inference-extension/epp}"
71-
export EPP_TAG="${EPP_TAG:-kv-aware}"
68+
export VLLM_IMAGE="${VLLM_IMAGE:-quay.io/llm-d/llm-d-dev}"
69+
export VLLM_TAG="${VLLM_TAG:-lmcache-0.0.6-amd64}"
70+
export EPP_IMAGE="${EPP_IMAGE:-quay.io/llm-d/llm-d-gateway-api-inference-extension-dev}"
71+
export EPP_TAG="${EPP_TAG:-0.0.5-amd64}"
7272
export MAX_MODEL_LEN="${MAX_MODEL_LEN:-32768}"
7373
export PVC_NAME="${PVC_NAME:-vllm-p2p-storage-claim}"
7474
export PVC_ACCESS_MODE="${PVC_ACCESS_MODE:-ReadWriteOnce}"

0 commit comments

Comments
 (0)