-
Notifications
You must be signed in to change notification settings - Fork 76
add helm template #416
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
add helm template #416
Changes from 2 commits
4931640
2366460
dcd3bd5
154f670
2490c28
814bec3
6712198
a885ea9
bf51f9a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
# Patterns to ignore when building packages. | ||
# This supports shell glob matching, relative path matching, and | ||
# negation (prefixed with !). Only one pattern per line. | ||
.DS_Store | ||
# Common VCS dirs | ||
.git/ | ||
.gitignore | ||
.bzr/ | ||
.bzrignore | ||
.hg/ | ||
.hgignore | ||
.svn/ | ||
# Common backup files | ||
*.swp | ||
*.bak | ||
*.tmp | ||
*.orig | ||
*~ | ||
# Various IDEs | ||
.project | ||
.idea/ | ||
*.tmproj | ||
.vscode/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
apiVersion: v2 | ||
name: gateway-api-inference-extension | ||
ahg-g marked this conversation as resolved.
Show resolved
Hide resolved
|
||
description: A Helm chart for gateway-api-inference-extension | ||
ahg-g marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
type: application | ||
|
||
version: 0.1.0 | ||
|
||
appVersion: "0.1.0" |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
@@ -0,0 +1 @@ | ||||||
Gateway api inference extension deployed. | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
{{/* | ||
Common labels | ||
*/}} | ||
{{- define "gateway-api-inference-extension.labels" -}} | ||
app.kubernetes.io/name: {{ .Values.inferenceExtension.name }} | ||
{{- if .Chart.AppVersion }} | ||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} | ||
{{- end }} | ||
{{- end }} | ||
|
||
{{/* | ||
Selector labels | ||
*/}} | ||
{{- define "gateway-api-inference-extension.selectorLabels" -}} | ||
app: {{ .Values.inferenceExtension.name }} | ||
{{- end -}} | ||
ahg-g marked this conversation as resolved.
Show resolved
Hide resolved
|
Original file line number | Diff line number | Diff line change | ||||||
---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,75 @@ | ||||||||
apiVersion: apps/v1 | ||||||||
ahg-g marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||||
kind: Deployment | ||||||||
metadata: | ||||||||
name: inference-gateway-ext-proc | ||||||||
namespace: {{ .Release.Namespace }} | ||||||||
labels: | ||||||||
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }} | ||||||||
spec: | ||||||||
replicas: {{ .Values.inferenceExtension.replicas | default 1 }} | ||||||||
selector: | ||||||||
matchLabels: | ||||||||
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }} | ||||||||
template: | ||||||||
metadata: | ||||||||
labels: | ||||||||
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }} | ||||||||
spec: | ||||||||
serviceAccountName: inference-gateway-ext-proc | ||||||||
containers: | ||||||||
- name: inference-gateway-ext-proc | ||||||||
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }} | ||||||||
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }} | ||||||||
args: | ||||||||
- -poolName | ||||||||
- {{ .Values.inferencePool.name }} | ||||||||
- -poolNamespace | ||||||||
- {{ .Values.inferencePool.namespace }} | ||||||||
- -v | ||||||||
- "3" | ||||||||
- -grpcPort | ||||||||
- "9002" | ||||||||
- -grpcHealthPort | ||||||||
- "9003" | ||||||||
- -metricsPort | ||||||||
- "9090" | ||||||||
ports: | ||||||||
- name: grpc | ||||||||
containerPort: 9002 | ||||||||
- name: grpc-health | ||||||||
containerPort: 9003 | ||||||||
- name: metrics | ||||||||
containerPort: 9090 | ||||||||
livenessProbe: | ||||||||
grpc: | ||||||||
port: 9003 | ||||||||
service: inference-extension | ||||||||
initialDelaySeconds: 5 | ||||||||
periodSeconds: 10 | ||||||||
readinessProbe: | ||||||||
grpc: | ||||||||
port: 9003 | ||||||||
service: inference-extension | ||||||||
initialDelaySeconds: 5 | ||||||||
periodSeconds: 10 | ||||||||
--- | ||||||||
apiVersion: v1 | ||||||||
kind: Service | ||||||||
metadata: | ||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||
namespace: {{ .Release.Namespace }} | ||||||||
labels: | ||||||||
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }} | ||||||||
spec: | ||||||||
selector: | ||||||||
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Both of these feel like they should be included in values.yaml There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is generated in
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'd missed that, thanks! While I don't think we need the InferencePool labels to be configurable, I think it's important to make the selector configurable. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You mean we should also create a inferencePool in the helm chart? (which I did not yet) |
||||||||
ports: | ||||||||
- name: grpc | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||
protocol: TCP | ||||||||
port: {{ .Values.inferenceExtension.grpcPort | default 9002 }} | ||||||||
targetPort: {{ .Values.inferenceExtension.grpcPort | default 9002 }} | ||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nit: I think I'd call this
Suggested change
|
||||||||
- name: http-metrics | ||||||||
protocol: TCP | ||||||||
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }} | ||||||||
targetPort: {{ .Values.inferenceExtension.metricsPort | default 9090 }} | ||||||||
type: ClusterIP |
Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,51 @@ | ||||||||||||||||||||||||||||
kind: ClusterRole | ||||||||||||||||||||||||||||
apiVersion: rbac.authorization.k8s.io/v1 | ||||||||||||||||||||||||||||
metadata: | ||||||||||||||||||||||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||||||||||||||||||||||
labels: | ||||||||||||||||||||||||||||
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }} | ||||||||||||||||||||||||||||
rules: | ||||||||||||||||||||||||||||
- apiGroups: ["inference.networking.x-k8s.io"] | ||||||||||||||||||||||||||||
resources: ["inferencemodels"] | ||||||||||||||||||||||||||||
verbs: ["get", "watch", "list"] | ||||||||||||||||||||||||||||
- apiGroups: [""] | ||||||||||||||||||||||||||||
resources: ["pods"] | ||||||||||||||||||||||||||||
verbs: ["get", "watch", "list"] | ||||||||||||||||||||||||||||
- apiGroups: ["inference.networking.x-k8s.io"] | ||||||||||||||||||||||||||||
resources: ["inferencepools"] | ||||||||||||||||||||||||||||
verbs: ["get", "watch", "list"] | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||||||||||||||||||||||||
- apiGroups: ["discovery.k8s.io"] | ||||||||||||||||||||||||||||
resources: ["endpointslices"] | ||||||||||||||||||||||||||||
verbs: ["get", "watch", "list"] | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @kfswain I don't think we need this any more right? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yes, we should remove this |
||||||||||||||||||||||||||||
- apiGroups: | ||||||||||||||||||||||||||||
- authentication.k8s.io | ||||||||||||||||||||||||||||
resources: | ||||||||||||||||||||||||||||
- tokenreviews | ||||||||||||||||||||||||||||
verbs: | ||||||||||||||||||||||||||||
- create | ||||||||||||||||||||||||||||
- apiGroups: | ||||||||||||||||||||||||||||
- authorization.k8s.io | ||||||||||||||||||||||||||||
resources: | ||||||||||||||||||||||||||||
- subjectaccessreviews | ||||||||||||||||||||||||||||
verbs: | ||||||||||||||||||||||||||||
- create | ||||||||||||||||||||||||||||
--- | ||||||||||||||||||||||||||||
kind: ClusterRoleBinding | ||||||||||||||||||||||||||||
apiVersion: rbac.authorization.k8s.io/v1 | ||||||||||||||||||||||||||||
metadata: | ||||||||||||||||||||||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||||||||||||||||||||||
subjects: | ||||||||||||||||||||||||||||
- kind: ServiceAccount | ||||||||||||||||||||||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||||||||||||||||||||||
namespace: {{ .Release.Namespace }} | ||||||||||||||||||||||||||||
roleRef: | ||||||||||||||||||||||||||||
kind: ClusterRole | ||||||||||||||||||||||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||||||||||||||||||||||
--- | ||||||||||||||||||||||||||||
apiVersion: v1 | ||||||||||||||||||||||||||||
kind: ServiceAccount | ||||||||||||||||||||||||||||
metadata: | ||||||||||||||||||||||||||||
name: {{ .Values.inferenceExtension.name }} | ||||||||||||||||||||||||||||
namespace: {{ .Release.Namespace }} | ||||||||||||||||||||||||||||
labels: | ||||||||||||||||||||||||||||
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }} | ||||||||||||||||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Recommend trailing new line |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
inferenceExtension: | ||
replicas: 1 | ||
image: | ||
name: epp | ||
hub: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension | ||
tag: main | ||
pullPolicy: Always | ||
|
||
name: inference-gateway-ext-proc | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should probably have the name of the inferencePool in it by default. So if the inference pools is called |
||
|
||
inferencePool: | ||
namespace: default | ||
name: vllm-llama2-7b-pool | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,137 @@ | ||
--- | ||
# Source: gateway-api-inference-extension/templates/rbac.yaml | ||
apiVersion: v1 | ||
kind: ServiceAccount | ||
metadata: | ||
name: inference-gateway-ext-proc | ||
namespace: default | ||
labels: | ||
app.kubernetes.io/name: inference-gateway-ext-proc | ||
app.kubernetes.io/version: "0.1.0" | ||
--- | ||
# Source: gateway-api-inference-extension/templates/rbac.yaml | ||
kind: ClusterRole | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
metadata: | ||
name: inference-gateway-ext-proc | ||
labels: | ||
app.kubernetes.io/name: inference-gateway-ext-proc | ||
app.kubernetes.io/version: "0.1.0" | ||
rules: | ||
- apiGroups: ["inference.networking.x-k8s.io"] | ||
resources: ["inferencemodels"] | ||
verbs: ["get", "watch", "list"] | ||
- apiGroups: [""] | ||
resources: ["pods"] | ||
verbs: ["get", "watch", "list"] | ||
- apiGroups: ["inference.networking.x-k8s.io"] | ||
resources: ["inferencepools"] | ||
verbs: ["get", "watch", "list"] | ||
- apiGroups: ["discovery.k8s.io"] | ||
resources: ["endpointslices"] | ||
verbs: ["get", "watch", "list"] | ||
- apiGroups: | ||
- authentication.k8s.io | ||
resources: | ||
- tokenreviews | ||
verbs: | ||
- create | ||
- apiGroups: | ||
- authorization.k8s.io | ||
resources: | ||
- subjectaccessreviews | ||
verbs: | ||
- create | ||
--- | ||
# Source: gateway-api-inference-extension/templates/rbac.yaml | ||
kind: ClusterRoleBinding | ||
apiVersion: rbac.authorization.k8s.io/v1 | ||
metadata: | ||
name: inference-gateway-ext-proc | ||
subjects: | ||
- kind: ServiceAccount | ||
name: inference-gateway-ext-proc | ||
namespace: default | ||
roleRef: | ||
kind: ClusterRole | ||
name: inference-gateway-ext-proc | ||
--- | ||
# Source: gateway-api-inference-extension/templates/ext_proc.yaml | ||
apiVersion: v1 | ||
kind: Service | ||
metadata: | ||
name: inference-gateway-ext-proc | ||
namespace: default | ||
labels: | ||
app.kubernetes.io/name: inference-gateway-ext-proc | ||
app.kubernetes.io/version: "0.1.0" | ||
spec: | ||
selector: | ||
app: inference-gateway-ext-proc | ||
ports: | ||
- name: grpc | ||
protocol: TCP | ||
port: 9002 | ||
targetPort: 9002 | ||
- name: http-metrics | ||
protocol: TCP | ||
port: 9090 | ||
targetPort: 9090 | ||
type: ClusterIP | ||
--- | ||
# Source: gateway-api-inference-extension/templates/ext_proc.yaml | ||
apiVersion: apps/v1 | ||
kind: Deployment | ||
metadata: | ||
name: inference-gateway-ext-proc | ||
namespace: default | ||
labels: | ||
app.kubernetes.io/name: inference-gateway-ext-proc | ||
app.kubernetes.io/version: "0.1.0" | ||
spec: | ||
replicas: 1 | ||
selector: | ||
matchLabels: | ||
app: inference-gateway-ext-proc | ||
template: | ||
metadata: | ||
labels: | ||
app: inference-gateway-ext-proc | ||
spec: | ||
serviceAccountName: inference-gateway-ext-proc | ||
containers: | ||
- name: inference-gateway-ext-proc | ||
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main | ||
imagePullPolicy: Always | ||
args: | ||
- -poolName | ||
- vllm-llama2-7b-pool | ||
- -poolNamespace | ||
- default | ||
- -v | ||
- "3" | ||
- -grpcPort | ||
- "9002" | ||
- -grpcHealthPort | ||
- "9003" | ||
- -metricsPort | ||
- "9090" | ||
ports: | ||
- name: grpc | ||
containerPort: 9002 | ||
- name: grpc-health | ||
containerPort: 9003 | ||
- name: metrics | ||
containerPort: 9090 | ||
livenessProbe: | ||
grpc: | ||
port: 9003 | ||
service: inference-extension | ||
initialDelaySeconds: 5 | ||
periodSeconds: 10 | ||
readinessProbe: | ||
grpc: | ||
port: 9003 | ||
service: inference-extension | ||
initialDelaySeconds: 5 | ||
periodSeconds: 10 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Nit: I'd expect this chart to live at
config/charts/inferencepool