helm-improvements (#590)

LiorLieberman · web-flow · commit fc3f41498cac · 2025-03-27T09:42:41.000-07:00
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -10,18 +10,18 @@ To install an InferencePool named `vllm-llama2-7b`  that selects from endpoints
 ```txt
 $ helm install vllm-llama2-7b ./config/charts/inferencepool \
   --set inferencePool.name=vllm-llama2-7b \
-  --set inferencePool.selector.app=vllm-llama2-7b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
   --set inferencePool.targetPortNumber=8000
 ```
 
-where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.selector` is the selector to match the vllm backends.
+where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
 
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
 $ helm install vllm-llama2-7b \
   --set inferencePool.name=vllm-llama2-7b \
-  --set inferencePool.selector.app=vllm-llama2-7b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
   --set inferencePool.targetPortNumber=8000 \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
@@ -42,7 +42,7 @@ The following table list the configurable parameters of the chart.
 |---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
 | `inferencePool.name`                        | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`.                |
 | `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension.             |
-| `inferencePool.selector`                     | Label selector to match vllm backends managed by the inference pool.                                             |
+| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                             |
 | `inferenceExtension.replicas`               | Number of replicas for the inference extension service. Defaults to `1`.                                           |
 | `inferenceExtension.image.name`             | Name of the container image used for the inference extension.                                                    |
 | `inferenceExtension.image.hub`              | Registry URL where the inference extension image is hosted.                                                     |
diff --git a/config/charts/inferencepool/templates/_validations.tpl b/config/charts/inferencepool/templates/_validations.tpl
@@ -0,0 +1,13 @@
+{{/*
+common validations
+*/}}
+{{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
+{{- if not $.Values.inferencePool.name }}
+{{- fail "missing .Values.inferencePool.name" }}
+{{- end }}
+
+
+{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
+{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
+{{- end }}
+{{- end -}}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -0,0 +1,58 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
+      containers:
+      - name: epp
+        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
+        args:
+        - -poolName
+        - {{ .Values.inferencePool.name }}
+        - -poolNamespace
+        - {{ .Release.Namespace }}
+        - -v
+        - "3"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        - -metricsPort
+        - "9090"
+        env:
+        - name: USE_STREAMING
+          value: "true"
+        ports:
+        - name: grpc
+          containerPort: 9002
+        - name: grpc-health
+          containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+
diff --git a/config/charts/inferencepool/templates/epp-service.yaml b/config/charts/inferencepool/templates/epp-service.yaml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: grpc-ext-proc
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    - name: http-metrics
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+  type: ClusterIP
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -1,3 +1,4 @@
+{{ include "gateway-api-inference-extension.validations.inferencepool.common" $ }}
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
@@ -8,85 +9,10 @@ metadata:
 spec:
   targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
   selector:
-      {{- range $key, $value := .Values.inferencePool.selector }}
-      {{ $key }}: {{ quote $value }}
-      {{- end }}
+    {{- if .Values.inferencePool.modelServers.matchLabels }}
+    {{- range $key, $value := .Values.inferencePool.modelServers.matchLabels }}
+    {{ $key }}: {{ quote $value }}
+    {{- end }}
+    {{- end }}
   extensionRef:
     name: {{ include "gateway-api-inference-extension.name" . }}
----
-apiVersion: apps/v1
-kind: Deployment
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-spec:
-  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
-  selector:
-    matchLabels:
-      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
-  template:
-    metadata:
-      labels:
-        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
-    spec:
-      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
-      containers:
-      - name: epp
-        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
-        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
-        args:
-        - -poolName
-        - {{ .Values.inferencePool.name }}
-        - -poolNamespace
-        - {{ .Release.Namespace }}
-        - -v
-        - "3"
-        - -grpcPort
-        - "9002"
-        - -grpcHealthPort
-        - "9003"
-        - -metricsPort
-        - "9090"
-        env:
-        - name: USE_STREAMING
-          value: "true"
-        ports:
-        - name: grpc
-          containerPort: 9002
-        - name: grpc-health
-          containerPort: 9003
-        - name: metrics
-          containerPort: 9090
-        livenessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
-        readinessProbe:
-          grpc:
-            port: 9003
-            service: inference-extension
-          initialDelaySeconds: 5
-          periodSeconds: 10
----
-apiVersion: v1
-kind: Service
-metadata:
-  name: {{ include "gateway-api-inference-extension.name" . }}
-  namespace: {{ .Release.Namespace }}
-  labels:
-    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
-spec:
-  selector:
-    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
-  ports:
-    - name: grpc-ext-proc
-      protocol: TCP
-      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
-    - name: http-metrics
-      protocol: TCP
-      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
-  type: ClusterIP
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -8,7 +8,8 @@ inferenceExtension:
   extProcPort: 9002
 
 inferencePool:
-  name: pool-1
+  # name: pool-1 # REQUIRED
   targetPortNumber: 8000
-  selector:
-    app: vllm-llama2-7b
+  # modelServers: # REQUIRED
+    # matchLabels: 
+    #   app: vllm-llama2-7b