kubernetes-sigs · k8s-ci-robot · Mar 28, 2025 · Mar 27, 2025 · Mar 28, 2025
diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
@@ -9,20 +9,14 @@ To install an InferencePool named `vllm-llama3-8b-instruct`  that selects from e
 
 ```txt
 $ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool \
-  --set inferencePool.name=vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set inferencePool.targetPortNumber=8000
 ```
 
-where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
-
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
 $ helm install vllm-llama3-8b-instruct \
-  --set inferencePool.name=vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set inferencePool.targetPortNumber=8000 \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
 
@@ -38,17 +32,17 @@ $ helm uninstall pool-1
 
 The following table list the configurable parameters of the chart.
 
-| **Parameter Name**                          | **Description**                                                                                                   |
-|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
-| `inferencePool.name`                        | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`.                |
-| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension.             |
-| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                             |
-| `inferenceExtension.replicas`               | Number of replicas for the inference extension service. Defaults to `1`.                                           |
-| `inferenceExtension.image.name`             | Name of the container image used for the inference extension.                                                    |
-| `inferenceExtension.image.hub`              | Registry URL where the inference extension image is hosted.                                                     |
-| `inferenceExtension.image.tag`              | Image tag of the inference extension.                                                                             |
-| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
-| `inferenceExtension.extProcPort`            | Port where the inference extension service is served for external processing. Defaults to `9002`.                  |
+| **Parameter Name**                          | **Description**                                                                                                        |
+|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
+| `inferencePool.name`                        | Name for the InferencePool, and endpoint picker deployment and service will be named as `{.Release.name}-epp`.         |
+| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
+| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                                   |
+| `inferenceExtension.replicas`               | Number of replicas for the endpoint picker extension service. Defaults to `1`.                                         |
+| `inferenceExtension.image.name`             | Name of the container image used for the endpoint picker.                                                              |
+| `inferenceExtension.image.hub`              | Registry URL where the endpoint picker image is hosted.                                                                |
+| `inferenceExtension.image.tag`              | Image tag of the endpoint picker.                                                                                      |
+| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
+| `inferenceExtension.extProcPort`            | Port where the endpoint picker service is served for external processing. Defaults to `9002`.                          |
 
 ## Notes
 

diff --git a/config/charts/inferencepool/templates/NOTES.txt b/config/charts/inferencepool/templates/NOTES.txt
@@ -1 +1 @@
-InferencePool {{ .Values.inferencePool.name }} deployed.
+InferencePool {{ .Release.Name }} deployed.
diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
@@ -12,13 +12,13 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 Inference extension name
 */}}
 {{- define "gateway-api-inference-extension.name" -}}
-{{- $base := .Values.inferencePool.name | default "default-pool" | lower | trim | trunc 40 -}}
+{{- $base := .Release.Name | default "default-pool" | lower | trim | trunc 40 -}}
 {{ $base }}-epp
 {{- end -}}
 
 {{/*
 Selector labels
 */}}
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
-app: {{ include "gateway-api-inference-extension.name" . }}
+inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
diff --git a/config/charts/inferencepool/templates/_validations.tpl b/config/charts/inferencepool/templates/_validations.tpl
@@ -2,11 +2,6 @@
 common validations
 */}}
 {{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
-{{- if not $.Values.inferencePool.name }}
-{{- fail "missing .Values.inferencePool.name" }}
-{{- end }}
-
-
 {{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
 {{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
 {{- end }}

diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -22,7 +22,7 @@ spec:
         imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
         args:
         - -poolName
-        - {{ .Values.inferencePool.name }}
+        - {{ .Release.Name }}
         - -poolNamespace
         - {{ .Release.Namespace }}
         - -v

diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
@@ -0,0 +1,59 @@
+{{- if eq .Values.provider.name "gke" }}
+---
+kind: HealthCheckPolicy
+apiVersion: networking.gke.io/v1
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: {{ .Release.Name }}
+  default:
+    config:
+      type: HTTP
+      httpHealthCheck:
+          requestPath: /health
+          port:  {{ .Values.inferencePool.targetPortNumber }}
+---
+apiVersion: networking.gke.io/v1
+kind: GCPBackendPolicy
+metadata:
+  name: {{ .Release.Name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: {{ .Release.Name }}
+  default:
+    timeoutSec: 300    # 5-minute timeout (adjust as needed)
+---
+apiVersion: monitoring.googleapis.com/v1
+kind: ClusterPodMonitoring
+metadata:
+  name: {{ .Release.Namespace }}-{{ .Release.Name }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  endpoints:
+  - port: metrics
+    scheme: http
+    interval: 5s
+    path: /metrics
+    authorization:
+      type: Bearer
+      credentials:
+        secret:
+          name: {{ .Values.gke.monitoringSecret }}
+          key: token
+          namespace: {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.labels" . | nindent 8 }}
+{{- end }}
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -2,7 +2,7 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
-  name: {{ .Values.inferencePool.name }}
+  name: {{ .Release.Name }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}

diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
@@ -8,8 +8,13 @@ inferenceExtension:
   extProcPort: 9002
 
 inferencePool:
-  # name: pool-1 # REQUIRED
   targetPortNumber: 8000
   # modelServers: # REQUIRED
     # matchLabels: 
     #   app: vllm-llama3-8b-instruct
+
+provider:
+  name: none
+
+gke:
+  monitoringSecret: inference-gateway-sa-metrics-reader-secret
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		InferencePool {{ .Values.inferencePool.name }} deployed.
		InferencePool {{ .Release.Name }} deployed.