We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent e2c381b commit 6a3b25eCopy full SHA for 6a3b25e
config/manifests/vllm/cpu-deployment.yaml
@@ -31,6 +31,8 @@ spec:
31
value: "8000"
32
- name: VLLM_ALLOW_RUNTIME_LORA_UPDATING
33
value: "true"
34
+ - name: VLLM_CPU_KVCACHE_SPACE
35
+ value: "4"
36
ports:
37
- containerPort: 8000
38
name: http
@@ -55,6 +57,13 @@ spec:
55
57
periodSeconds: 5
56
58
successThreshold: 1
59
timeoutSeconds: 1
60
+ resources:
61
+ limits:
62
+ cpu: "12"
63
+ memory: "9000Mi"
64
+ requests:
65
66
67
volumeMounts:
68
- mountPath: /data
69
name: data
0 commit comments