Skip to content

add helm template #416

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Mar 19, 2025
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*.orig
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
apiVersion: v2
name: InferencePool
description: A Helm chart for InferencePool

type: application

version: 0.1.0

appVersion: "0.2.0"
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Gateway api inference extension deployed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
{{/*
Common labels
*/}}
{{- define "gateway-api-inference-extension.labels" -}}
app.kubernetes.io/name: {{ include "gateway-api-inference-extension.name" . }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
{{- end }}

{{/*
Inference extension name
*/}}
{{- define "gateway-api-inference-extension.name" -}}
{{- $base := .Values.inferencePool.name | default "default-pool" | lower | trim | trunc 40 -}}
{{ $base }}-epp
{{- end -}}

{{/*
Selector labels
*/}}
{{- define "gateway-api-inference-extension.selectorLabels" -}}
app: {{ include "gateway-api-inference-extension.name" . }}
{{- end -}}
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
name: {{ .Values.inferencePool.name }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
targetPortNumber: {{ .Values.inferencePool.targetPortNumber }}
selector:
{{- range $key, $value := .Values.inferencePool.selector }}
{{ $key }}: {{ quote $value }}
{{- end }}
extensionRef:
name: {{ include "gateway-api-inference-extension.name" . }}
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
selector:
matchLabels:
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
spec:
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
containers:
- name: epp
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
args:
- -poolName
- {{ .Values.inferencePool.name }}
- -poolNamespace
- {{ .Release.Namespace }}
- -v
- "3"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
- -metricsPort
- "9090"
ports:
- name: grpc
containerPort: 9002
- name: grpc-health
containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
apiVersion: v1
kind: Service
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
spec:
selector:
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
ports:
- name: grpc-ext-proc
protocol: TCP
port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
- name: http-metrics
protocol: TCP
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
type: ClusterIP
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels, inferencepools"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}
subjects:
- kind: ServiceAccount
name: {{ include "gateway-api-inference-extension.name" . }}
namespace: {{ .Release.Namespace }}
roleRef:
kind: ClusterRole
name: {{ include "gateway-api-inference-extension.name" . }}
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ include "gateway-api-inference-extension.name" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
inferenceExtension:
replicas: 1
image:
name: epp
hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension
tag: main
pullPolicy: Always
extProcPort: 9002

inferencePool:
name: pool-1
targetPortNumber: 8000
selector:
app: vllm-llama2-7b
145 changes: 145 additions & 0 deletions config/manifests/generated.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
---
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you please remove this for now, it doesn't match the config we have in patch_policy.yaml

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I could be wrong, but I don't think there's any way to remove this from Helm's generated output. We probably should have a follow up issue here that adds make commands to auto generate this + verify that this file matches the helm chart as part of a presubmit.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah yes, I temporarily render the template in generated.yaml so that you can review the template easier, I will remove this. And I agree with @robscott, we should add make commands later.

# Source: InferencePool/templates/rbac.yaml
apiVersion: v1
kind: ServiceAccount
metadata:
name: pool-1-epp
namespace: default
labels:
app.kubernetes.io/name: pool-1-epp
app.kubernetes.io/version: "0.2.0"
---
# Source: InferencePool/templates/rbac.yaml
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pool-1-epp
labels:
app.kubernetes.io/name: pool-1-epp
app.kubernetes.io/version: "0.2.0"
rules:
- apiGroups: ["inference.networking.x-k8s.io"]
resources: ["inferencemodels, inferencepools"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list"]
- apiGroups:
- authentication.k8s.io
resources:
- tokenreviews
verbs:
- create
- apiGroups:
- authorization.k8s.io
resources:
- subjectaccessreviews
verbs:
- create
---
# Source: InferencePool/templates/rbac.yaml
kind: ClusterRoleBinding
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: pool-1-epp
subjects:
- kind: ServiceAccount
name: pool-1-epp
namespace: default
roleRef:
kind: ClusterRole
name: pool-1-epp
---
# Source: InferencePool/templates/inferencepool.yaml
apiVersion: v1
kind: Service
metadata:
name: pool-1-epp
namespace: default
labels:
app.kubernetes.io/name: pool-1-epp
app.kubernetes.io/version: "0.2.0"
spec:
selector:
app: pool-1-epp
ports:
- name: grpc-ext-proc
protocol: TCP
port: 9002
- name: http-metrics
protocol: TCP
port: 9090
type: ClusterIP
---
# Source: InferencePool/templates/inferencepool.yaml
apiVersion: apps/v1
kind: Deployment
metadata:
name: pool-1-epp
namespace: default
labels:
app.kubernetes.io/name: pool-1-epp
app.kubernetes.io/version: "0.2.0"
spec:
replicas: 1
selector:
matchLabels:
app: pool-1-epp
template:
metadata:
labels:
app: pool-1-epp
spec:
serviceAccountName: pool-1-epp
containers:
- name: epp
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
imagePullPolicy: Always
args:
- -poolName
- pool-1
- -poolNamespace
- default
- -v
- "3"
- -grpcPort
- "9002"
- -grpcHealthPort
- "9003"
- -metricsPort
- "9090"
ports:
- name: grpc
containerPort: 9002
- name: grpc-health
containerPort: 9003
- name: metrics
containerPort: 9090
livenessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
readinessProbe:
grpc:
port: 9003
service: inference-extension
initialDelaySeconds: 5
periodSeconds: 10
---
# Source: InferencePool/templates/inferencepool.yaml
apiVersion: inference.networking.x-k8s.io/v1alpha2
kind: InferencePool
metadata:
name: pool-1
namespace: default
labels:
app.kubernetes.io/name: pool-1-epp
app.kubernetes.io/version: "0.2.0"
spec:
targetPortNumber: 8000
selector:
app: "vllm-llama2-7b"
extensionRef:
name: pool-1-epp