Skip to content

Commit fc3f414

Browse files
helm-improvements (#590)
1 parent 2fed6ca commit fc3f414

File tree

6 files changed

+103
-87
lines changed

6 files changed

+103
-87
lines changed

config/charts/inferencepool/README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@ To install an InferencePool named `vllm-llama2-7b` that selects from endpoints
1010
```txt
1111
$ helm install vllm-llama2-7b ./config/charts/inferencepool \
1212
--set inferencePool.name=vllm-llama2-7b \
13-
--set inferencePool.selector.app=vllm-llama2-7b \
13+
--set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
1414
--set inferencePool.targetPortNumber=8000
1515
```
1616

17-
where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.selector` is the selector to match the vllm backends.
17+
where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
1818

1919
To install via the latest published chart in staging (--version v0 indicates latest dev version), you can run the following command:
2020

2121
```txt
2222
$ helm install vllm-llama2-7b \
2323
--set inferencePool.name=vllm-llama2-7b \
24-
--set inferencePool.selector.app=vllm-llama2-7b \
24+
--set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
2525
--set inferencePool.targetPortNumber=8000 \
2626
oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
2727
```
@@ -42,7 +42,7 @@ The following table list the configurable parameters of the chart.
4242
|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
4343
| `inferencePool.name` | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`. |
4444
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. |
45-
| `inferencePool.selector` | Label selector to match vllm backends managed by the inference pool. |
45+
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
4646
| `inferenceExtension.replicas` | Number of replicas for the inference extension service. Defaults to `1`. |
4747
| `inferenceExtension.image.name` | Name of the container image used for the inference extension. |
4848
| `inferenceExtension.image.hub` | Registry URL where the inference extension image is hosted. |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{{/*
2+
common validations
3+
*/}}
4+
{{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
5+
{{- if not $.Values.inferencePool.name }}
6+
{{- fail "missing .Values.inferencePool.name" }}
7+
{{- end }}
8+
9+
10+
{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
11+
{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
12+
{{- end }}
13+
{{- end -}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
8+
spec:
9+
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
10+
selector:
11+
matchLabels:
12+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
13+
template:
14+
metadata:
15+
labels:
16+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
17+
spec:
18+
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
19+
containers:
20+
- name: epp
21+
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
22+
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
23+
args:
24+
- -poolName
25+
- {{ .Values.inferencePool.name }}
26+
- -poolNamespace
27+
- {{ .Release.Namespace }}
28+
- -v
29+
- "3"
30+
- -grpcPort
31+
- "9002"
32+
- -grpcHealthPort
33+
- "9003"
34+
- -metricsPort
35+
- "9090"
36+
env:
37+
- name: USE_STREAMING
38+
value: "true"
39+
ports:
40+
- name: grpc
41+
containerPort: 9002
42+
- name: grpc-health
43+
containerPort: 9003
44+
- name: metrics
45+
containerPort: 9090
46+
livenessProbe:
47+
grpc:
48+
port: 9003
49+
service: inference-extension
50+
initialDelaySeconds: 5
51+
periodSeconds: 10
52+
readinessProbe:
53+
grpc:
54+
port: 9003
55+
service: inference-extension
56+
initialDelaySeconds: 5
57+
periodSeconds: 10
58+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
8+
spec:
9+
selector:
10+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
11+
ports:
12+
- name: grpc-ext-proc
13+
protocol: TCP
14+
port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
15+
- name: http-metrics
16+
protocol: TCP
17+
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
18+
type: ClusterIP
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
12
apiVersion: inference.networking.x-k8s.io/v1alpha2
23
kind: InferencePool
34
metadata:
@@ -8,85 +9,10 @@ metadata:
89
spec:
910
targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
1011
selector:
11-
{{- range $key, $value := .Values.inferencePool.selector }}
12-
{{ $key }}: {{ quote $value }}
13-
{{- end }}
12+
{{- if .Values.inferencePool.modelServers.matchLabels }}
13+
{{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
14+
{{ $key }}: {{ quote $value }}
15+
{{- end }}
16+
{{- end }}
1417
extensionRef:
1518
name: {{ include "gateway-api-inference-extension.name" . }}
16-
---
17-
apiVersion: apps/v1
18-
kind: Deployment
19-
metadata:
20-
name: {{ include "gateway-api-inference-extension.name" . }}
21-
namespace: {{ .Release.Namespace }}
22-
labels:
23-
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
24-
spec:
25-
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
26-
selector:
27-
matchLabels:
28-
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
29-
template:
30-
metadata:
31-
labels:
32-
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
33-
spec:
34-
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
35-
containers:
36-
- name: epp
37-
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
38-
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
39-
args:
40-
- -poolName
41-
- {{ .Values.inferencePool.name }}
42-
- -poolNamespace
43-
- {{ .Release.Namespace }}
44-
- -v
45-
- "3"
46-
- -grpcPort
47-
- "9002"
48-
- -grpcHealthPort
49-
- "9003"
50-
- -metricsPort
51-
- "9090"
52-
env:
53-
- name: USE_STREAMING
54-
value: "true"
55-
ports:
56-
- name: grpc
57-
containerPort: 9002
58-
- name: grpc-health
59-
containerPort: 9003
60-
- name: metrics
61-
containerPort: 9090
62-
livenessProbe:
63-
grpc:
64-
port: 9003
65-
service: inference-extension
66-
initialDelaySeconds: 5
67-
periodSeconds: 10
68-
readinessProbe:
69-
grpc:
70-
port: 9003
71-
service: inference-extension
72-
initialDelaySeconds: 5
73-
periodSeconds: 10
74-
---
75-
apiVersion: v1
76-
kind: Service
77-
metadata:
78-
name: {{ include "gateway-api-inference-extension.name" . }}
79-
namespace: {{ .Release.Namespace }}
80-
labels:
81-
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
82-
spec:
83-
selector:
84-
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
85-
ports:
86-
- name: grpc-ext-proc
87-
protocol: TCP
88-
port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
89-
- name: http-metrics
90-
protocol: TCP
91-
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
92-
type: ClusterIP

config/charts/inferencepool/values.yaml

+4-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,8 @@ inferenceExtension:
88
extProcPort: 9002
99

1010
inferencePool:
11-
name: pool-1
11+
# name: pool-1 # REQUIRED
1212
targetPortNumber: 8000
13-
selector:
14-
app: vllm-llama2-7b
13+
# modelServers: # REQUIRED
14+
# matchLabels:
15+
# app: vllm-llama2-7b

0 commit comments

Comments
 (0)