Skip to content

Commit b63263d

Browse files
authored
Merge branch 'kubernetes-sigs:main' into userguide
2 parents ce19438 + fc3f414 commit b63263d

File tree

17 files changed

+272
-149
lines changed

17 files changed

+272
-149
lines changed

Makefile

+2-3
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ IMAGE_REGISTRY ?= $(STAGING_IMAGE_REGISTRY)/gateway-api-inference-extension
3333
IMAGE_NAME := epp
3434
IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME)
3535
IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG)
36-
ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
36+
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
3737
E2E_MANIFEST_PATH ?= config/manifests/vllm/gpu-deployment.yaml
3838

3939
SYNCER_IMAGE_NAME := lora-syncer
@@ -92,7 +92,6 @@ generate: controller-gen code-generator manifests ## Generate code containing De
9292
$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
9393
./hack/update-codegen.sh
9494

95-
PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
9695
# Use same code-generator version as k8s.io/api
9796
CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api)
9897
CODEGEN = $(shell pwd)/bin/code-generator
@@ -130,7 +129,7 @@ test-integration: ## Run tests.
130129

131130
.PHONY: test-e2e
132131
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster. When using default configuration, the tests need at least 3 available GPUs.
133-
MANIFEST_PATH=$(ROOT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v
132+
MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v
134133

135134
.PHONY: lint
136135
lint: golangci-lint ## Run golangci-lint linter

api/v1alpha2/inferencemodel_types.go

+4
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ import (
2525
// +kubebuilder:object:root=true
2626
// +kubebuilder:subresource:status
2727
// +kubebuilder:storageversion
28+
// +kubebuilder:printcolumn:name="Model Name",type=string,JSONPath=`.spec.modelName`
29+
// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name`
30+
// +kubebuilder:printcolumn:name="Criticality",type=string,JSONPath=`.spec.criticality`
31+
// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
2832
// +genclient
2933
type InferenceModel struct {
3034
metav1.TypeMeta `json:",inline"`

config/charts/body-based-routing/README.md

+12-8
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,20 @@ A chart to the body-based routing deployment and service.
88
To install a body-based router named `body-based-router`, you can run the following command:
99

1010
```txt
11-
$ helm install body-based-router ./config/charts/body-based-routing
11+
$ helm install body-based-router ./config/charts/body-based-routing \
12+
--set provider.name=[gke|istio] \
13+
--set inference-gateway.name=inference-gateway
1214
```
1315

16+
Note that the provider name is needed to ensure provider-specific manifests are also applied. If no provider is specified, then only
17+
the deployment and service are deployed.
18+
1419
To install via the latest published chart in staging (--version v0 indicates latest dev version), you can run the following command:
1520

1621
```txt
17-
$ helm install body-based-router oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/body-based-router --version v0
22+
$ helm install body-based-router oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/body-based-router \
23+
--version v0
24+
--set provider.name=[gke|istio]
1825
```
1926

2027
## Uninstall
@@ -37,12 +44,9 @@ The following table list the configurable parameters of the chart.
3744
| `bbr.image.hub` | Registry URL where the image is hosted. |
3845
| `bbr.image.tag` | Image tag. |
3946
| `bbr.image.pullPolicy` | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
47+
| `provider.name` | Name of the Inference Gateway implementation being used. Possible values: `istio`, `gke`. Defaults to `none`. |
48+
| `inference-gateway.name` | The name of the Gateway. Defaults to `inference-gateway`. |
4049

4150
## Notes
4251

43-
This chart will only deploy the body-based router deployment and service.
44-
Note that this should only be deployed once per Gateway.
45-
46-
Additional configuration is needed to configure a proxy extension that calls
47-
out to the service in the request path. For example, vwith Envoy Gateway, this
48-
would require configuring EnvoyExtensionPolicy.
52+
This chart should only be deployed once per Gateway.
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
{{- if eq .Values.provider.name "gke" }}
2+
---
3+
kind: GCPRoutingExtension
4+
apiVersion: networking.gke.io/v1
5+
metadata:
6+
name: {{ .Values.bbr.name }}
7+
namespace: {{ .Release.Namespace }}
8+
spec:
9+
targetRefs:
10+
- group: "gateway.networking.k8s.io"
11+
kind: Gateway
12+
name: {{ .Values.inference-gateway.name }}
13+
extensionChains:
14+
- name: chain1
15+
extensions:
16+
- name: ext1
17+
authority: "myext.com"
18+
timeout: 1s
19+
supportedEvents:
20+
- RequestHeaders
21+
- RequestBody
22+
- RequestTrailers
23+
requestBodySendMode: "FullDuplexStreamed"
24+
backendRef:
25+
group: ""
26+
kind: Service
27+
name: {{ .Values.bbr.name }}
28+
port: 9004
29+
---
30+
apiVersion: networking.gke.io/v1
31+
kind: HealthCheckPolicy
32+
metadata:
33+
name: bbr-healthcheck
34+
namespace: {{ .Release.Namespace }}
35+
spec:
36+
default:
37+
logConfig:
38+
enabled: true
39+
config:
40+
type: "GRPC"
41+
grpcHealthCheck:
42+
portSpecification: "USE_FIXED_PORT"
43+
port: 9005
44+
targetRef:
45+
group: ""
46+
kind: Service
47+
name: {{ .Values.bbr.name }}
48+
namespace: {{ .Release.Namespace }}
49+
{{- end }}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
{{- if eq .Values.provider.name "istio" }}
2+
---
3+
apiVersion: networking.istio.io/v1alpha3
4+
kind: EnvoyFilter
5+
metadata:
6+
name: {{ .Values.bbr.name }}
7+
namespace: {{ .Release.Namespace }}
8+
spec:
9+
configPatches:
10+
- applyTo: HTTP_FILTER
11+
match:
12+
# context omitted so that this applies to both sidecars and gateways
13+
listener:
14+
filterChain:
15+
filter:
16+
name: "envoy.filters.network.http_connection_manager"
17+
patch:
18+
operation: INSERT_FIRST
19+
value:
20+
name: envoy.filters.http.ext_proc
21+
typed_config:
22+
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
23+
failure_mode_allow: false
24+
allow_mode_override: true
25+
processing_mode:
26+
request_header_mode: "SEND"
27+
response_header_mode: "SKIP"
28+
request_body_mode: "BUFFERED"
29+
response_body_mode: "NONE"
30+
request_trailer_mode: "SKIP"
31+
response_trailer_mode: "SKIP"
32+
grpc_service:
33+
envoy_grpc:
34+
cluster_name: outbound|9004||{{ .Values.bbr.name }}.default.svc.cluster.local
35+
---
36+
apiVersion: networking.istio.io/v1
37+
kind: DestinationRule
38+
metadata:
39+
name: {{ .Values.bbr.name }}
40+
namespace: {{ .Release.Namespace }}
41+
spec:
42+
host: {{ .Values.bbr.name }}.default.svc.cluster.local
43+
trafficPolicy:
44+
tls:
45+
mode: SIMPLE
46+
insecureSkipVerify: true
47+
{{- end }}

config/charts/body-based-routing/values.yaml

+6
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,9 @@ bbr:
77
tag: main
88
pullPolicy: Always
99
extProcPort: 9002
10+
11+
provider:
12+
name: none
13+
14+
inference-gateway:
15+
name: inference-gateway

config/charts/inferencepool/README.md

+4-4
Original file line numberDiff line numberDiff line change
@@ -10,18 +10,18 @@ To install an InferencePool named `vllm-llama2-7b` that selects from endpoints
1010
```txt
1111
$ helm install vllm-llama2-7b ./config/charts/inferencepool \
1212
--set inferencePool.name=vllm-llama2-7b \
13-
--set inferencePool.selector.app=vllm-llama2-7b \
13+
--set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
1414
--set inferencePool.targetPortNumber=8000
1515
```
1616

17-
where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.selector` is the selector to match the vllm backends.
17+
where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
1818

1919
To install via the latest published chart in staging (--version v0 indicates latest dev version), you can run the following command:
2020

2121
```txt
2222
$ helm install vllm-llama2-7b \
2323
--set inferencePool.name=vllm-llama2-7b \
24-
--set inferencePool.selector.app=vllm-llama2-7b \
24+
--set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
2525
--set inferencePool.targetPortNumber=8000 \
2626
oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
2727
```
@@ -42,7 +42,7 @@ The following table list the configurable parameters of the chart.
4242
|---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
4343
| `inferencePool.name` | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`. |
4444
| `inferencePool.targetPortNumber` | Target port number for the vllm backends, will be used to scrape metrics by the inference extension. |
45-
| `inferencePool.selector` | Label selector to match vllm backends managed by the inference pool. |
45+
| `inferencePool.modelServers.matchLabels` | Label selector to match vllm backends managed by the inference pool. |
4646
| `inferenceExtension.replicas` | Number of replicas for the inference extension service. Defaults to `1`. |
4747
| `inferenceExtension.image.name` | Name of the container image used for the inference extension. |
4848
| `inferenceExtension.image.hub` | Registry URL where the inference extension image is hosted. |
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
{{/*
2+
common validations
3+
*/}}
4+
{{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
5+
{{- if not $.Values.inferencePool.name }}
6+
{{- fail "missing .Values.inferencePool.name" }}
7+
{{- end }}
8+
9+
10+
{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
11+
{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
12+
{{- end }}
13+
{{- end -}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
8+
spec:
9+
replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
10+
selector:
11+
matchLabels:
12+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
13+
template:
14+
metadata:
15+
labels:
16+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
17+
spec:
18+
serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
19+
containers:
20+
- name: epp
21+
image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
22+
imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
23+
args:
24+
- -poolName
25+
- {{ .Values.inferencePool.name }}
26+
- -poolNamespace
27+
- {{ .Release.Namespace }}
28+
- -v
29+
- "3"
30+
- -grpcPort
31+
- "9002"
32+
- -grpcHealthPort
33+
- "9003"
34+
- -metricsPort
35+
- "9090"
36+
env:
37+
- name: USE_STREAMING
38+
value: "true"
39+
ports:
40+
- name: grpc
41+
containerPort: 9002
42+
- name: grpc-health
43+
containerPort: 9003
44+
- name: metrics
45+
containerPort: 9090
46+
livenessProbe:
47+
grpc:
48+
port: 9003
49+
service: inference-extension
50+
initialDelaySeconds: 5
51+
periodSeconds: 10
52+
readinessProbe:
53+
grpc:
54+
port: 9003
55+
service: inference-extension
56+
initialDelaySeconds: 5
57+
periodSeconds: 10
58+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: {{ include "gateway-api-inference-extension.name" . }}
5+
namespace: {{ .Release.Namespace }}
6+
labels:
7+
{{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
8+
spec:
9+
selector:
10+
{{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
11+
ports:
12+
- name: grpc-ext-proc
13+
protocol: TCP
14+
port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
15+
- name: http-metrics
16+
protocol: TCP
17+
port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
18+
type: ClusterIP

0 commit comments

Comments
 (0)