kubernetes-sigs · k8s-ci-robot · Mar 19, 2025 · Feb 27, 2025 · Feb 28, 2025 · Mar 18, 2025
diff --git a/config/manifests/gateway-api-inference-extension/.helmignore b/config/manifests/gateway-api-inference-extension/.helmignore
@@ -0,0 +1,23 @@
+# Patterns to ignore when building packages.
+# This supports shell glob matching, relative path matching, and
+# negation (prefixed with !). Only one pattern per line.
+.DS_Store
+# Common VCS dirs
+.git/
+.gitignore
+.bzr/
+.bzrignore
+.hg/
+.hgignore
+.svn/
+# Common backup files
+*.swp
+*.bak
+*.tmp
+*.orig
+*~
+# Various IDEs
+.project
+.idea/
+*.tmproj
+.vscode/
diff --git a/config/manifests/gateway-api-inference-extension/Chart.yaml b/config/manifests/gateway-api-inference-extension/Chart.yaml
@@ -0,0 +1,9 @@
+apiVersion: v2
+name: gateway-api-inference-extension
+description: A Helm chart for gateway-api-inference-extension
+
+type: application
+
+version: 0.1.0
+
+appVersion: "0.1.0"
diff --git a/config/manifests/gateway-api-inference-extension/templates/NOTES.txt b/config/manifests/gateway-api-inference-extension/templates/NOTES.txt
@@ -0,0 +1 @@
+Gateway api inference extension deployed.
-Gateway api inference extension deployed.
+InferencePool deployed.
-Gateway api inference extension deployed.
+InferencePool deployed.
diff --git a/config/manifests/gateway-api-inference-extension/templates/_helpers.tpl b/config/manifests/gateway-api-inference-extension/templates/_helpers.tpl
@@ -0,0 +1,16 @@
+{{/*
+Common labels
+*/}}
+{{- define "gateway-api-inference-extension.labels" -}}
+app.kubernetes.io/name: {{ .Values.inferenceExtension.name }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "gateway-api-inference-extension.selectorLabels" -}}
+app: {{ .Values.inferenceExtension.name }}
+{{- end -}}
diff --git a/config/manifests/gateway-api-inference-extension/templates/ext_proc.yaml b/config/manifests/gateway-api-inference-extension/templates/ext_proc.yaml
@@ -0,0 +1,75 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: inference-gateway-ext-proc
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: inference-gateway-ext-proc
+      containers:
+      - name: inference-gateway-ext-proc
+        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
+        args:
+        - -poolName
+        - {{ .Values.inferencePool.name }}
+        - -poolNamespace
+        - {{ .Values.inferencePool.namespace }}
+        - -v
+        - "3"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        - -metricsPort
+        - "9090"
+        ports:
+        - name: grpc
+          containerPort: 9002
+        - name: grpc-health
+          containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ .Values.inferenceExtension.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: grpc
-    - name: grpc
+    - name: ext_proc
-    - name: grpc
+    - name: ext_proc
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
+      targetPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
-      port: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
-      targetPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
-      port: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
-      targetPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }}
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    - name: http-metrics
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+      targetPort: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+  type: ClusterIP
diff --git a/config/manifests/gateway-api-inference-extension/templates/rbac.yaml b/config/manifests/gateway-api-inference-extension/templates/rbac.yaml
@@ -0,0 +1,51 @@
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ .Values.inferenceExtension.name }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
+  resources: ["inferencemodels", "inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
+  resources: ["inferencemodels", "inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["discovery.k8s.io"]
+  resources: ["endpointslices"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+---
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: {{ .Values.inferenceExtension.name }}
+subjects:
+- kind: ServiceAccount
+  name: {{ .Values.inferenceExtension.name }}
+  namespace: {{ .Release.Namespace }}
+roleRef:
+  kind: ClusterRole
+  name: {{ .Values.inferenceExtension.name }}
+---
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: {{ .Values.inferenceExtension.name }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
diff --git a/config/manifests/gateway-api-inference-extension/values.yaml b/config/manifests/gateway-api-inference-extension/values.yaml
@@ -0,0 +1,13 @@
+inferenceExtension:
+  replicas: 1
+  image:
+    name: epp
+    hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
+    tag: main
+    pullPolicy: Always
+
+  name: inference-gateway-ext-proc
+
+inferencePool:
+  namespace: default
+  name: vllm-llama2-7b-pool
diff --git a/config/manifests/install.yaml b/config/manifests/install.yaml
@@ -0,0 +1,137 @@
+---
+# Source: gateway-api-inference-extension/templates/rbac.yaml
+apiVersion: v1
+kind: ServiceAccount
+metadata:
+  name: inference-gateway-ext-proc
+  namespace: default
+  labels:
+    app.kubernetes.io/name: inference-gateway-ext-proc
+    app.kubernetes.io/version: "0.1.0"
+---
+# Source: gateway-api-inference-extension/templates/rbac.yaml
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: inference-gateway-ext-proc
+  labels:
+    app.kubernetes.io/name: inference-gateway-ext-proc
+    app.kubernetes.io/version: "0.1.0"
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["discovery.k8s.io"]
+  resources: ["endpointslices"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+---
+# Source: gateway-api-inference-extension/templates/rbac.yaml
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: inference-gateway-ext-proc
+subjects:
+- kind: ServiceAccount
+  name: inference-gateway-ext-proc
+  namespace: default
+roleRef:
+  kind: ClusterRole
+  name: inference-gateway-ext-proc
+---
+# Source: gateway-api-inference-extension/templates/ext_proc.yaml
+apiVersion: v1
+kind: Service
+metadata:
+  name: inference-gateway-ext-proc
+  namespace: default
+  labels:
+    app.kubernetes.io/name: inference-gateway-ext-proc
+    app.kubernetes.io/version: "0.1.0"
+spec:
+  selector:
+    app: inference-gateway-ext-proc
+  ports:
+    - name: grpc
+      protocol: TCP
+      port: 9002
+      targetPort: 9002
+    - name: http-metrics
+      protocol: TCP
+      port: 9090
+      targetPort: 9090
+  type: ClusterIP
+---
+# Source: gateway-api-inference-extension/templates/ext_proc.yaml
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: inference-gateway-ext-proc
+  namespace: default
+  labels:
+    app.kubernetes.io/name: inference-gateway-ext-proc
+    app.kubernetes.io/version: "0.1.0"
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: inference-gateway-ext-proc
+  template:
+    metadata:
+      labels:
+        app: inference-gateway-ext-proc
+    spec:
+      serviceAccountName: inference-gateway-ext-proc
+      containers:
+      - name: inference-gateway-ext-proc
+        image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
+        imagePullPolicy: Always
+        args:
+        - -poolName
+        - vllm-llama2-7b-pool
+        - -poolNamespace
+        - default
+        - -v
+        - "3"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        - -metricsPort
+        - "9090"
+        ports:
+        - name: grpc
+          containerPort: 9002
+        - name: grpc-health
+          containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10