From 3d3b39a2bf88046574c42ee3b1e22fe96b7064e3 Mon Sep 17 00:00:00 2001
From: ahg-g <ahg@google.com>
Date: Thu, 27 Mar 2025 23:43:00 +0000
Subject: [PATCH 1/2] Added provider support to InferencePool helm chart

---
 .../inferencepool/templates/_helpers.tpl      |  2 +-
 .../charts/inferencepool/templates/gke.yaml   | 59 +++++++++++++++++++
 config/charts/inferencepool/values.yaml       |  6 ++
 3 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 config/charts/inferencepool/templates/gke.yaml

diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
index bb15f9e4e..b3883eb87 100644
--- a/config/charts/inferencepool/templates/_helpers.tpl
+++ b/config/charts/inferencepool/templates/_helpers.tpl
@@ -20,5 +20,5 @@ Inference extension name
 Selector labels
 */}}
 {{- define "gateway-api-inference-extension.selectorLabels" -}}
-app: {{ include "gateway-api-inference-extension.name" . }}
+inferencepool: {{ include "gateway-api-inference-extension.name" . }}
 {{- end -}}
diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
new file mode 100644
index 000000000..a382451ae
--- /dev/null
+++ b/config/charts/inferencepool/templates/gke.yaml
@@ -0,0 +1,59 @@
+{{- if eq .Values.provider.name "gke" }}
+---
+kind: HealthCheckPolicy
+apiVersion: networking.gke.io/v1
+metadata:
+  name: {{ .Values.inferencePool.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: {{ .Values.inferencePool.name }}
+  default:
+    config:
+      type: HTTP
+      httpHealthCheck:
+          requestPath: /health
+          port:  {{ .Values.inferencePool.targetPortNumber }}
+---
+apiVersion: networking.gke.io/v1
+kind: GCPBackendPolicy
+metadata:
+  name: {{ .Values.inferencePool.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  targetRef:
+    group: "inference.networking.x-k8s.io"
+    kind: InferencePool
+    name: {{ .Values.inferencePool.name }}
+  default:
+    timeoutSec: 300    # 5-minute timeout (adjust as needed)
+---
+apiVersion: monitoring.googleapis.com/v1
+kind: ClusterPodMonitoring
+metadata:
+  name: {{ .Release.Namespace }}-{{ .Values.inferencePool.name }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  endpoints:
+  - port: metrics
+    scheme: http
+    interval: 5s
+    path: /metrics
+    authorization:
+      type: Bearer
+      credentials:
+        secret:
+          name: {{ .Values.gke.monitoringSecret }}
+          key: token
+          namespace: {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.labels" . | nindent 8 }}
+{{- end }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index 7b0c8f96f..3b3651889 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -13,3 +13,9 @@ inferencePool:
   # modelServers: # REQUIRED
     # matchLabels: 
     #   app: vllm-llama3-8b-instruct
+
+provider:
+  name: none
+
+gke:
+  monitoringSecret: inference-gateway-sa-metrics-reader-secret

From ea6a45f94cd36afeb93cda5d349b1e1822aa3dab Mon Sep 17 00:00:00 2001
From: ahg-g <ahg@google.com>
Date: Fri, 28 Mar 2025 13:16:48 +0000
Subject: [PATCH 2/2] Removed the redundant pool name flag

---
 config/charts/inferencepool/README.md         | 28 ++++++++-----------
 .../charts/inferencepool/templates/NOTES.txt  |  2 +-
 .../inferencepool/templates/_helpers.tpl      |  2 +-
 .../inferencepool/templates/_validations.tpl  |  5 ----
 .../templates/epp-deployment.yaml             |  2 +-
 .../charts/inferencepool/templates/gke.yaml   | 10 +++----
 .../templates/inferencepool.yaml              |  2 +-
 config/charts/inferencepool/values.yaml       |  1 -
 8 files changed, 20 insertions(+), 32 deletions(-)

diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md
index 30087527d..681fc7836 100644
--- a/config/charts/inferencepool/README.md
+++ b/config/charts/inferencepool/README.md
@@ -9,20 +9,14 @@ To install an InferencePool named `vllm-llama3-8b-instruct`  that selects from e
 
 ```txt
 $ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool \
-  --set inferencePool.name=vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set inferencePool.targetPortNumber=8000
 ```
 
-where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
-
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
 $ helm install vllm-llama3-8b-instruct \
-  --set inferencePool.name=vllm-llama3-8b-instruct \
   --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \
-  --set inferencePool.targetPortNumber=8000 \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
 
@@ -38,17 +32,17 @@ $ helm uninstall pool-1
 
 The following table list the configurable parameters of the chart.
 
-| **Parameter Name**                          | **Description**                                                                                                   |
-|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
-| `inferencePool.name`                        | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`.                |
-| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension.             |
-| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                             |
-| `inferenceExtension.replicas`               | Number of replicas for the inference extension service. Defaults to `1`.                                           |
-| `inferenceExtension.image.name`             | Name of the container image used for the inference extension.                                                    |
-| `inferenceExtension.image.hub`              | Registry URL where the inference extension image is hosted.                                                     |
-| `inferenceExtension.image.tag`              | Image tag of the inference extension.                                                                             |
-| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
-| `inferenceExtension.extProcPort`            | Port where the inference extension service is served for external processing. Defaults to `9002`.                  |
+| **Parameter Name**                          | **Description**                                                                                                        |
+|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------|
+| `inferencePool.name`                        | Name for the InferencePool, and endpoint picker deployment and service will be named as `{.Release.name}-epp`.         |
+| `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. |
+| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                                   |
+| `inferenceExtension.replicas`               | Number of replicas for the endpoint picker extension service. Defaults to `1`.                                         |
+| `inferenceExtension.image.name`             | Name of the container image used for the endpoint picker.                                                              |
+| `inferenceExtension.image.hub`              | Registry URL where the endpoint picker image is hosted.                                                                |
+| `inferenceExtension.image.tag`              | Image tag of the endpoint picker.                                                                                      |
+| `inferenceExtension.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`.      |
+| `inferenceExtension.extProcPort`            | Port where the endpoint picker service is served for external processing. Defaults to `9002`.                          |
 
 ## Notes
 
diff --git a/config/charts/inferencepool/templates/NOTES.txt b/config/charts/inferencepool/templates/NOTES.txt
index 3d8221659..22e5c0e19 100644
--- a/config/charts/inferencepool/templates/NOTES.txt
+++ b/config/charts/inferencepool/templates/NOTES.txt
@@ -1 +1 @@
-InferencePool {{ .Values.inferencePool.name }} deployed.
+InferencePool {{ .Release.Name }} deployed.
diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl
index b3883eb87..e011bb7c1 100644
--- a/config/charts/inferencepool/templates/_helpers.tpl
+++ b/config/charts/inferencepool/templates/_helpers.tpl
@@ -12,7 +12,7 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 Inference extension name
 */}}
 {{- define "gateway-api-inference-extension.name" -}}
-{{- $base := .Values.inferencePool.name | default "default-pool" | lower | trim | trunc 40 -}}
+{{- $base := .Release.Name | default "default-pool" | lower | trim | trunc 40 -}}
 {{ $base }}-epp
 {{- end -}}
 
diff --git a/config/charts/inferencepool/templates/_validations.tpl b/config/charts/inferencepool/templates/_validations.tpl
index 55ed80c8d..65c743b6e 100644
--- a/config/charts/inferencepool/templates/_validations.tpl
+++ b/config/charts/inferencepool/templates/_validations.tpl
@@ -2,11 +2,6 @@
 common validations
 */}}
 {{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
-{{- if not $.Values.inferencePool.name }}
-{{- fail "missing .Values.inferencePool.name" }}
-{{- end }}
-
-
 {{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
 {{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
 {{- end }}
diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml
index ded9cb12f..9faace735 100644
--- a/config/charts/inferencepool/templates/epp-deployment.yaml
+++ b/config/charts/inferencepool/templates/epp-deployment.yaml
@@ -22,7 +22,7 @@ spec:
         imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
         args:
         - -poolName
-        - {{ .Values.inferencePool.name }}
+        - {{ .Release.Name }}
         - -poolNamespace
         - {{ .Release.Namespace }}
         - -v
diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml
index a382451ae..86e8c4ff3 100644
--- a/config/charts/inferencepool/templates/gke.yaml
+++ b/config/charts/inferencepool/templates/gke.yaml
@@ -3,7 +3,7 @@
 kind: HealthCheckPolicy
 apiVersion: networking.gke.io/v1
 metadata:
-  name: {{ .Values.inferencePool.name }}
+  name: {{ .Release.Name }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
@@ -11,7 +11,7 @@ spec:
   targetRef:
     group: "inference.networking.x-k8s.io"
     kind: InferencePool
-    name: {{ .Values.inferencePool.name }}
+    name: {{ .Release.Name }}
   default:
     config:
       type: HTTP
@@ -22,7 +22,7 @@ spec:
 apiVersion: networking.gke.io/v1
 kind: GCPBackendPolicy
 metadata:
-  name: {{ .Values.inferencePool.name }}
+  name: {{ .Release.Name }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
@@ -30,14 +30,14 @@ spec:
   targetRef:
     group: "inference.networking.x-k8s.io"
     kind: InferencePool
-    name: {{ .Values.inferencePool.name }}
+    name: {{ .Release.Name }}
   default:
     timeoutSec: 300    # 5-minute timeout (adjust as needed)
 ---
 apiVersion: monitoring.googleapis.com/v1
 kind: ClusterPodMonitoring
 metadata:
-  name: {{ .Release.Namespace }}-{{ .Values.inferencePool.name }}
+  name: {{ .Release.Namespace }}-{{ .Release.Name }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
 spec:
diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml
index 2b79f3991..4b279cbda 100644
--- a/config/charts/inferencepool/templates/inferencepool.yaml
+++ b/config/charts/inferencepool/templates/inferencepool.yaml
@@ -2,7 +2,7 @@
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
-  name: {{ .Values.inferencePool.name }}
+  name: {{ .Release.Name }}
   namespace: {{ .Release.Namespace }}
   labels:
     {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml
index 3b3651889..45dd11a11 100644
--- a/config/charts/inferencepool/values.yaml
+++ b/config/charts/inferencepool/values.yaml
@@ -8,7 +8,6 @@ inferenceExtension:
   extProcPort: 9002
 
 inferencePool:
-  # name: pool-1 # REQUIRED
   targetPortNumber: 8000
   # modelServers: # REQUIRED
     # matchLabels: