File tree 3 files changed +19
-8
lines changed
environments/dev/kubernetes-kgateway
3 files changed +19
-8
lines changed Original file line number Diff line number Diff line change @@ -31,13 +31,12 @@ spec:
31
31
- " -c"
32
32
args :
33
33
- |
34
- export LMCACHE_DISTRIBUTED_URL=$${${POD_IP}}:80 && \
34
+ export LMCACHE_DISTRIBUTED_URL=$${${POD_IP}} && \
35
35
vllm serve ${MODEL_NAME} \
36
36
--host 0.0.0.0 \
37
37
--port 8000 \
38
- --enable-chunked-prefill false \
39
38
--max-model-len ${MAX_MODEL_LEN} \
40
- --kv-transfer-config '{"kv_connector":"LMCacheConnector ","kv_role":"kv_both"}'
39
+ --kv-transfer-config '{"kv_connector":"LMCacheConnectorV1 ","kv_role":"kv_both"}'
41
40
ports :
42
41
- name : http
43
42
containerPort : 8000
78
77
secretKeyRef :
79
78
name : ${HF_SECRET_NAME}
80
79
key : ${HF_SECRET_KEY}
80
+ - name : VLLM_ENABLE_V1_MULTIPROCESSING
81
+ value : " 1"
82
+ - name : VLLM_WORKER_MULTIPROC_METHOD
83
+ value : spawn
81
84
- name : LMCACHE_LOOKUP_URL
82
85
value : ${REDIS_HOST}:${REDIS_PORT}
83
86
- name : LMCACHE_ENABLE_DEBUG
Original file line number Diff line number Diff line change 29
29
valueFrom :
30
30
secretKeyRef :
31
31
name : hf-token
32
- key : ${HF_SECRET_KEY}
32
+ key : ${HF_SECRET_KEY}
33
+ - name : ENABLE_KVCACHE_AWARE_SCORER
34
+ value : " true"
35
+ - name : KVCACHE_AWARE_SCORER_WEIGHT
36
+ value : " 2.0"
37
+ - name : ENABLE_LOAD_AWARE_SCORER
38
+ value : " true"
39
+ - name : LOAD_AWARE_SCORER_WEIGHT
40
+ value : " 1.0"
Original file line number Diff line number Diff line change @@ -65,10 +65,10 @@ case "${VLLM_MODE}" in
65
65
export LORA_ADAPTER_SYNCER_TAG=" ${LORA_ADAPTER_SYNCER_TAG:- v20250425-ddc3d69} "
66
66
67
67
elif [[ " $VLLM_MODE " == " vllm-p2p" ]]; then
68
- export VLLM_IMAGE=" ${VLLM_IMAGE:- lmcache / vllm-openai } "
69
- export VLLM_TAG=" ${VLLM_TAG:- 2025-03-10 } "
70
- export EPP_IMAGE=" ${EPP_IMAGE:- quay.io/ vmaroon / gateway-api-inference-extension/ epp } "
71
- export EPP_TAG=" ${EPP_TAG:- kv-aware } "
68
+ export VLLM_IMAGE=" ${VLLM_IMAGE:- quay.io / llm-d / llm-d-dev } "
69
+ export VLLM_TAG=" ${VLLM_TAG:- lmcache-0.0.6-amd64 } "
70
+ export EPP_IMAGE=" ${EPP_IMAGE:- quay.io/ llm-d / llm-d- gateway-api-inference-extension-dev } "
71
+ export EPP_TAG=" ${EPP_TAG:- 0.0.5-amd64 } "
72
72
export MAX_MODEL_LEN=" ${MAX_MODEL_LEN:- 32768} "
73
73
export PVC_NAME=" ${PVC_NAME:- vllm-p2p-storage-claim} "
74
74
export PVC_ACCESS_MODE=" ${PVC_ACCESS_MODE:- ReadWriteOnce} "
You can’t perform that action at this time.
0 commit comments