From 3d3b39a2bf88046574c42ee3b1e22fe96b7064e3 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Thu, 27 Mar 2025 23:43:00 +0000 Subject: [PATCH 1/2] Added provider support to InferencePool helm chart --- .../inferencepool/templates/_helpers.tpl | 2 +- .../charts/inferencepool/templates/gke.yaml | 59 +++++++++++++++++++ config/charts/inferencepool/values.yaml | 6 ++ 3 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 config/charts/inferencepool/templates/gke.yaml diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl index bb15f9e4e..b3883eb87 100644 --- a/config/charts/inferencepool/templates/_helpers.tpl +++ b/config/charts/inferencepool/templates/_helpers.tpl @@ -20,5 +20,5 @@ Inference extension name Selector labels */}} {{- define "gateway-api-inference-extension.selectorLabels" -}} -app: {{ include "gateway-api-inference-extension.name" . }} +inferencepool: {{ include "gateway-api-inference-extension.name" . }} {{- end -}} diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml new file mode 100644 index 000000000..a382451ae --- /dev/null +++ b/config/charts/inferencepool/templates/gke.yaml @@ -0,0 +1,59 @@ +{{- if eq .Values.provider.name "gke" }} +--- +kind: HealthCheckPolicy +apiVersion: networking.gke.io/v1 +metadata: + name: {{ .Values.inferencePool.name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + targetRef: + group: "inference.networking.x-k8s.io" + kind: InferencePool + name: {{ .Values.inferencePool.name }} + default: + config: + type: HTTP + httpHealthCheck: + requestPath: /health + port: {{ .Values.inferencePool.targetPortNumber }} +--- +apiVersion: networking.gke.io/v1 +kind: GCPBackendPolicy +metadata: + name: {{ .Values.inferencePool.name }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + targetRef: + group: "inference.networking.x-k8s.io" + kind: InferencePool + name: {{ .Values.inferencePool.name }} + default: + timeoutSec: 300 # 5-minute timeout (adjust as needed) +--- +apiVersion: monitoring.googleapis.com/v1 +kind: ClusterPodMonitoring +metadata: + name: {{ .Release.Namespace }}-{{ .Values.inferencePool.name }} + labels: + {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} +spec: + endpoints: + - port: metrics + scheme: http + interval: 5s + path: /metrics + authorization: + type: Bearer + credentials: + secret: + name: {{ .Values.gke.monitoringSecret }} + key: token + namespace: {{ .Release.Namespace }} + selector: + matchLabels: + {{- include "gateway-api-inference-extension.labels" . | nindent 8 }} +{{- end }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 7b0c8f96f..3b3651889 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -13,3 +13,9 @@ inferencePool: # modelServers: # REQUIRED # matchLabels: # app: vllm-llama3-8b-instruct + +provider: + name: none + +gke: + monitoringSecret: inference-gateway-sa-metrics-reader-secret From ea6a45f94cd36afeb93cda5d349b1e1822aa3dab Mon Sep 17 00:00:00 2001 From: ahg-g Date: Fri, 28 Mar 2025 13:16:48 +0000 Subject: [PATCH 2/2] Removed the redundant pool name flag --- config/charts/inferencepool/README.md | 28 ++++++++----------- .../charts/inferencepool/templates/NOTES.txt | 2 +- .../inferencepool/templates/_helpers.tpl | 2 +- .../inferencepool/templates/_validations.tpl | 5 ---- .../templates/epp-deployment.yaml | 2 +- .../charts/inferencepool/templates/gke.yaml | 10 +++---- .../templates/inferencepool.yaml | 2 +- config/charts/inferencepool/values.yaml | 1 - 8 files changed, 20 insertions(+), 32 deletions(-) diff --git a/config/charts/inferencepool/README.md b/config/charts/inferencepool/README.md index 30087527d..681fc7836 100644 --- a/config/charts/inferencepool/README.md +++ b/config/charts/inferencepool/README.md @@ -9,20 +9,14 @@ To install an InferencePool named `vllm-llama3-8b-instruct` that selects from e ```txt $ helm install vllm-llama3-8b-instruct ./config/charts/inferencepool \ - --set inferencePool.name=vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ - --set inferencePool.targetPortNumber=8000 ``` -where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends. - To install via the latest published chart in staging (--version v0 indicates latest dev version), you can run the following command: ```txt $ helm install vllm-llama3-8b-instruct \ - --set inferencePool.name=vllm-llama3-8b-instruct \ --set inferencePool.modelServers.matchLabels.app=vllm-llama3-8b-instruct \ - --set inferencePool.targetPortNumber=8000 \ oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0 ``` @@ -38,17 +32,17 @@ $ helm uninstall pool-1 The following table list the configurable parameters of the chart. -| **Parameter Name** | **Description** | -|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------| -| `inferencePool.name` | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`. | -| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. | -| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | -| `inferenceExtension.replicas` | Number of replicas for the inference extension service. Defaults to `1`. | -| `inferenceExtension.image.name` | Name of the container image used for the inference extension. | -| `inferenceExtension.image.hub` | Registry URL where the inference extension image is hosted. | -| `inferenceExtension.image.tag` | Image tag of the inference extension. | -| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | -| `inferenceExtension.extProcPort` | Port where the inference extension service is served for external processing. Defaults to `9002`. | +| **Parameter Name** | **Description** | +|---------------------------------------------|------------------------------------------------------------------------------------------------------------------------| +| `inferencePool.name` | Name for the InferencePool, and endpoint picker deployment and service will be named as `{.Release.name}-epp`. | +| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. Defaults to 8000. | +| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. | +| `inferenceExtension.replicas` | Number of replicas for the endpoint picker extension service. Defaults to `1`. | +| `inferenceExtension.image.name` | Name of the container image used for the endpoint picker. | +| `inferenceExtension.image.hub` | Registry URL where the endpoint picker image is hosted. | +| `inferenceExtension.image.tag` | Image tag of the endpoint picker. | +| `inferenceExtension.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. | +| `inferenceExtension.extProcPort` | Port where the endpoint picker service is served for external processing. Defaults to `9002`. | ## Notes diff --git a/config/charts/inferencepool/templates/NOTES.txt b/config/charts/inferencepool/templates/NOTES.txt index 3d8221659..22e5c0e19 100644 --- a/config/charts/inferencepool/templates/NOTES.txt +++ b/config/charts/inferencepool/templates/NOTES.txt @@ -1 +1 @@ -InferencePool {{ .Values.inferencePool.name }} deployed. +InferencePool {{ .Release.Name }} deployed. diff --git a/config/charts/inferencepool/templates/_helpers.tpl b/config/charts/inferencepool/templates/_helpers.tpl index b3883eb87..e011bb7c1 100644 --- a/config/charts/inferencepool/templates/_helpers.tpl +++ b/config/charts/inferencepool/templates/_helpers.tpl @@ -12,7 +12,7 @@ app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} Inference extension name */}} {{- define "gateway-api-inference-extension.name" -}} -{{- $base := .Values.inferencePool.name | default "default-pool" | lower | trim | trunc 40 -}} +{{- $base := .Release.Name | default "default-pool" | lower | trim | trunc 40 -}} {{ $base }}-epp {{- end -}} diff --git a/config/charts/inferencepool/templates/_validations.tpl b/config/charts/inferencepool/templates/_validations.tpl index 55ed80c8d..65c743b6e 100644 --- a/config/charts/inferencepool/templates/_validations.tpl +++ b/config/charts/inferencepool/templates/_validations.tpl @@ -2,11 +2,6 @@ common validations */}} {{- define "gateway-api-inference-extension.validations.inferencepool.common" -}} -{{- if not $.Values.inferencePool.name }} -{{- fail "missing .Values.inferencePool.name" }} -{{- end }} - - {{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }} {{- fail ".Values.inferencePool.modelServers.matchLabels is required" }} {{- end }} diff --git a/config/charts/inferencepool/templates/epp-deployment.yaml b/config/charts/inferencepool/templates/epp-deployment.yaml index ded9cb12f..9faace735 100644 --- a/config/charts/inferencepool/templates/epp-deployment.yaml +++ b/config/charts/inferencepool/templates/epp-deployment.yaml @@ -22,7 +22,7 @@ spec: imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }} args: - -poolName - - {{ .Values.inferencePool.name }} + - {{ .Release.Name }} - -poolNamespace - {{ .Release.Namespace }} - -v diff --git a/config/charts/inferencepool/templates/gke.yaml b/config/charts/inferencepool/templates/gke.yaml index a382451ae..86e8c4ff3 100644 --- a/config/charts/inferencepool/templates/gke.yaml +++ b/config/charts/inferencepool/templates/gke.yaml @@ -3,7 +3,7 @@ kind: HealthCheckPolicy apiVersion: networking.gke.io/v1 metadata: - name: {{ .Values.inferencePool.name }} + name: {{ .Release.Name }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} @@ -11,7 +11,7 @@ spec: targetRef: group: "inference.networking.x-k8s.io" kind: InferencePool - name: {{ .Values.inferencePool.name }} + name: {{ .Release.Name }} default: config: type: HTTP @@ -22,7 +22,7 @@ spec: apiVersion: networking.gke.io/v1 kind: GCPBackendPolicy metadata: - name: {{ .Values.inferencePool.name }} + name: {{ .Release.Name }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} @@ -30,14 +30,14 @@ spec: targetRef: group: "inference.networking.x-k8s.io" kind: InferencePool - name: {{ .Values.inferencePool.name }} + name: {{ .Release.Name }} default: timeoutSec: 300 # 5-minute timeout (adjust as needed) --- apiVersion: monitoring.googleapis.com/v1 kind: ClusterPodMonitoring metadata: - name: {{ .Release.Namespace }}-{{ .Values.inferencePool.name }} + name: {{ .Release.Namespace }}-{{ .Release.Name }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} spec: diff --git a/config/charts/inferencepool/templates/inferencepool.yaml b/config/charts/inferencepool/templates/inferencepool.yaml index 2b79f3991..4b279cbda 100644 --- a/config/charts/inferencepool/templates/inferencepool.yaml +++ b/config/charts/inferencepool/templates/inferencepool.yaml @@ -2,7 +2,7 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2 kind: InferencePool metadata: - name: {{ .Values.inferencePool.name }} + name: {{ .Release.Name }} namespace: {{ .Release.Namespace }} labels: {{- include "gateway-api-inference-extension.labels" . | nindent 4 }} diff --git a/config/charts/inferencepool/values.yaml b/config/charts/inferencepool/values.yaml index 3b3651889..45dd11a11 100644 --- a/config/charts/inferencepool/values.yaml +++ b/config/charts/inferencepool/values.yaml @@ -8,7 +8,6 @@ inferenceExtension: extProcPort: 9002 inferencePool: - # name: pool-1 # REQUIRED targetPortNumber: 8000 # modelServers: # REQUIRED # matchLabels: