kubernetes-sigs
diff --git a/‎Makefile
+2-3 b/‎Makefile
+2-3
diff --git a/‎api/v1alpha2/inferencemodel_types.go
+4 b/‎api/v1alpha2/inferencemodel_types.go
+4
diff --git a/‎config/charts/body-based-routing/README.md
+12-8 b/‎config/charts/body-based-routing/README.md
+12-8
diff --git a/‎config/charts/body-based-routing/templates/gke.yaml
+49 b/‎config/charts/body-based-routing/templates/gke.yaml
+49
diff --git a/‎config/charts/body-based-routing/templates/istio.yaml
+47 b/‎config/charts/body-based-routing/templates/istio.yaml
+47
diff --git a/‎config/charts/body-based-routing/values.yaml
+6 b/‎config/charts/body-based-routing/values.yaml
+6
diff --git a/‎config/charts/inferencepool/README.md
+4-4 b/‎config/charts/inferencepool/README.md
+4-4
diff --git a/‎config/charts/inferencepool/templates/_validations.tpl
+13 b/‎config/charts/inferencepool/templates/_validations.tpl
+13
diff --git a/‎config/charts/inferencepool/templates/epp-deployment.yaml
+58 b/‎config/charts/inferencepool/templates/epp-deployment.yaml
+58
diff --git a/‎config/charts/inferencepool/templates/epp-service.yaml
+18 b/‎config/charts/inferencepool/templates/epp-service.yaml
+18
@@ -33,7 +33,7 @@ IMAGE_REGISTRY ?= $(STAGING_IMAGE_REGISTRY)/gateway-api-inference-extension
 IMAGE_NAME := epp
 IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME)
 IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG)
-ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
 E2E_MANIFEST_PATH ?= config/manifests/vllm/gpu-deployment.yaml
 
 SYNCER_IMAGE_NAME := lora-syncer
@@ -92,7 +92,6 @@ generate: controller-gen code-generator manifests ## Generate code containing De
 	$(CONTROLLER_GEN) object:headerFile="hack/boilerplate.go.txt" paths="./..."
 	./hack/update-codegen.sh
 
-PROJECT_DIR := $(shell dirname $(abspath $(lastword $(MAKEFILE_LIST))))
 # Use same code-generator version as k8s.io/api
 CODEGEN_VERSION := $(shell go list -m -f '{{.Version}}' k8s.io/api)
 CODEGEN = $(shell pwd)/bin/code-generator
@@ -130,7 +129,7 @@ test-integration: ## Run tests.
 
 .PHONY: test-e2e
 test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster. When using default configuration, the tests need at least 3 available GPUs.
-	MANIFEST_PATH=$(ROOT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v
+	MANIFEST_PATH=$(PROJECT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v
 
 .PHONY: lint
 lint: golangci-lint ## Run golangci-lint linter
 
@@ -25,6 +25,10 @@ import (
 // +kubebuilder:object:root=true
 // +kubebuilder:subresource:status
 // +kubebuilder:storageversion
+// +kubebuilder:printcolumn:name="Model Name",type=string,JSONPath=`.spec.modelName`
+// +kubebuilder:printcolumn:name="Inference Pool",type=string,JSONPath=`.spec.poolRef.name`
+// +kubebuilder:printcolumn:name="Criticality",type=string,JSONPath=`.spec.criticality`
+// +kubebuilder:printcolumn:name="Age",type=date,JSONPath=`.metadata.creationTimestamp`
 // +genclient
 type InferenceModel struct {
 	metav1.TypeMeta   `json:",inline"`
 
@@ -8,13 +8,20 @@ A chart to the body-based routing deployment and service.
 To install a body-based router named `body-based-router`, you can run the following command:
 
 ```txt
-$ helm install body-based-router ./config/charts/body-based-routing
+$ helm install body-based-router ./config/charts/body-based-routing \
+    --set provider.name=[gke|istio] \
+    --set inference-gateway.name=inference-gateway
 ```
 
+Note that the provider name is needed to ensure provider-specific manifests are also applied. If no provider is specified, then only
+the deployment and service are deployed.
+
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
-$ helm install body-based-router oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/body-based-router --version v0
+$ helm install body-based-router oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/body-based-router \ 
+    --version v0
+    --set provider.name=[gke|istio]
 ```
 
 ## Uninstall
@@ -37,12 +44,9 @@ The following table list the configurable parameters of the chart.
 | `bbr.image.hub`              | Registry URL where the image is hosted.                                                                           | 
 | `bbr.image.tag`              | Image tag.                                                                                                        |
 | `bbr.image.pullPolicy`       | Image pull policy for the container. Possible values: `Always`, `IfNotPresent`, or `Never`. Defaults to `Always`. |
+| `provider.name`              | Name of the Inference Gateway implementation being used. Possible values: `istio`, `gke`. Defaults to `none`.     |
+| `inference-gateway.name`     | The name of the Gateway. Defaults to `inference-gateway`.                                                      |                        
 
 ## Notes
 
-This chart will only deploy the body-based router deployment and service.
-Note that this should only be deployed once per Gateway.
-
-Additional configuration is needed to configure a proxy extension that calls
-out to the service in the request path. For example, vwith Envoy Gateway, this
-would require configuring EnvoyExtensionPolicy.
+This chart should only be deployed once per Gateway.
@@ -0,0 +1,49 @@
+{{- if eq .Values.provider.name "gke" }}
+---
+kind: GCPRoutingExtension
+apiVersion: networking.gke.io/v1
+metadata:
+  name: {{ .Values.bbr.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  targetRefs:
+  - group: "gateway.networking.k8s.io"
+    kind: Gateway
+    name: {{ .Values.inference-gateway.name }}
+  extensionChains:
+  - name: chain1
+    extensions:
+    - name: ext1
+      authority: "myext.com"
+      timeout: 1s
+      supportedEvents:
+      - RequestHeaders
+      - RequestBody
+      - RequestTrailers
+      requestBodySendMode: "FullDuplexStreamed"
+      backendRef:
+        group: ""
+        kind: Service
+        name: {{ .Values.bbr.name }}
+        port: 9004
+---
+apiVersion: networking.gke.io/v1
+kind: HealthCheckPolicy
+metadata:
+  name: bbr-healthcheck
+  namespace: {{ .Release.Namespace }}
+spec:
+  default:
+    logConfig:
+      enabled: true
+    config:
+      type: "GRPC"
+      grpcHealthCheck:
+        portSpecification: "USE_FIXED_PORT"
+        port: 9005
+  targetRef:
+    group: ""
+    kind: Service
+    name: {{ .Values.bbr.name }}
+    namespace: {{ .Release.Namespace }}
+{{- end }}
@@ -0,0 +1,47 @@
+{{- if eq .Values.provider.name "istio" }}
+---
+apiVersion: networking.istio.io/v1alpha3
+kind: EnvoyFilter
+metadata:
+  name: {{ .Values.bbr.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  configPatches:
+  - applyTo: HTTP_FILTER
+    match:
+      # context omitted so that this applies to both sidecars and gateways
+      listener:
+        filterChain:
+          filter:
+            name: "envoy.filters.network.http_connection_manager"
+    patch:
+      operation: INSERT_FIRST
+      value:
+        name: envoy.filters.http.ext_proc
+        typed_config:
+          "@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
+          failure_mode_allow: false
+          allow_mode_override: true
+          processing_mode:
+            request_header_mode: "SEND"
+            response_header_mode: "SKIP"
+            request_body_mode: "BUFFERED"
+            response_body_mode: "NONE"
+            request_trailer_mode: "SKIP"
+            response_trailer_mode: "SKIP"
+          grpc_service:
+            envoy_grpc:
+              cluster_name: outbound|9004||{{ .Values.bbr.name }}.default.svc.cluster.local
+---
+apiVersion: networking.istio.io/v1
+kind: DestinationRule
+metadata:
+  name: {{ .Values.bbr.name }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  host: {{ .Values.bbr.name }}.default.svc.cluster.local
+  trafficPolicy:
+      tls:
+        mode: SIMPLE
+        insecureSkipVerify: true
+{{- end }}
@@ -7,3 +7,9 @@ bbr:
     tag: main
     pullPolicy: Always
   extProcPort: 9002
+
+provider:
+  name: none
+
+inference-gateway:
+  name: inference-gateway
@@ -10,18 +10,18 @@ To install an InferencePool named `vllm-llama2-7b`  that selects from endpoints
 ```txt
 $ helm install vllm-llama2-7b ./config/charts/inferencepool \
   --set inferencePool.name=vllm-llama2-7b \
-  --set inferencePool.selector.app=vllm-llama2-7b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
   --set inferencePool.targetPortNumber=8000
 ```
 
-where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.selector` is the selector to match the vllm backends.
+where `inferencePool.targetPortNumber` is the pod that vllm backends served on and `inferencePool.modelServers.matchLabels` is the selector to match the vllm backends.
 
 To install via the latest published chart in staging  (--version v0 indicates latest dev version), you can run the following command:
 
 ```txt
 $ helm install vllm-llama2-7b \
   --set inferencePool.name=vllm-llama2-7b \
-  --set inferencePool.selector.app=vllm-llama2-7b \
+  --set inferencePool.modelServers.matchLabels.app=vllm-llama2-7b \
   --set inferencePool.targetPortNumber=8000 \
   oci://us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/charts/inferencepool --version v0
 ```
@@ -42,7 +42,7 @@ The following table list the configurable parameters of the chart.
 |---------------------------------------------|-------------------------------------------------------------------------------------------------------------------|
 | `inferencePool.name`                        | Name for the InferencePool, and inference extension will be named as `${inferencePool.name}-epp`.                |
 | `inferencePool.targetPortNumber`            | Target port number for the vllm backends, will be used to scrape metrics by the inference extension.             |
-| `inferencePool.selector`                     | Label selector to match vllm backends managed by the inference pool.                                             |
+| `inferencePool.modelServers.matchLabels`    | Label selector to match vllm backends managed by the inference pool.                                             |
 | `inferenceExtension.replicas`               | Number of replicas for the inference extension service. Defaults to `1`.                                           |
 | `inferenceExtension.image.name`             | Name of the container image used for the inference extension.                                                    |
 | `inferenceExtension.image.hub`              | Registry URL where the inference extension image is hosted.                                                     |
 
@@ -0,0 +1,13 @@
+{{/*
+common validations
+*/}}
+{{- define "gateway-api-inference-extension.validations.inferencepool.common" -}}
+{{- if not $.Values.inferencePool.name }}
+{{- fail "missing .Values.inferencePool.name" }}
+{{- end }}
+
+
+{{- if or (empty $.Values.inferencePool.modelServers) (not $.Values.inferencePool.modelServers.matchLabels) }}
+{{- fail ".Values.inferencePool.modelServers.matchLabels is required" }}
+{{- end }}
+{{- end -}}
@@ -0,0 +1,58 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  replicas: {{ .Values.inferenceExtension.replicas | default 1 }}
+  selector:
+    matchLabels:
+      {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 6 }}
+  template:
+    metadata:
+      labels:
+        {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 8 }}
+    spec:
+      serviceAccountName: {{ include "gateway-api-inference-extension.name" . }}
+      containers:
+      - name: epp
+        image: {{ .Values.inferenceExtension.image.hub }}/{{ .Values.inferenceExtension.image.name }}:{{ .Values.inferenceExtension.image.tag }}
+        imagePullPolicy: {{ .Values.inferenceExtension.image.pullPolicy | default "Always" }}
+        args:
+        - -poolName
+        - {{ .Values.inferencePool.name }}
+        - -poolNamespace
+        - {{ .Release.Namespace }}
+        - -v
+        - "3"
+        - -grpcPort
+        - "9002"
+        - -grpcHealthPort
+        - "9003"
+        - -metricsPort
+        - "9090"
+        env:
+        - name: USE_STREAMING
+          value: "true"
+        ports:
+        - name: grpc
+          containerPort: 9002
+        - name: grpc-health
+          containerPort: 9003
+        - name: metrics
+          containerPort: 9090
+        livenessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+        readinessProbe:
+          grpc:
+            port: 9003
+            service: inference-extension
+          initialDelaySeconds: 5
+          periodSeconds: 10
+
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: {{ include "gateway-api-inference-extension.name" . }}
+  namespace: {{ .Release.Namespace }}
+  labels:
+    {{- include "gateway-api-inference-extension.labels" . | nindent 4 }}
+spec:
+  selector:
+    {{- include "gateway-api-inference-extension.selectorLabels" . | nindent 4 }}
+  ports:
+    - name: grpc-ext-proc
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.extProcPort | default 9002 }}
+    - name: http-metrics
+      protocol: TCP
+      port: {{ .Values.inferenceExtension.metricsPort | default 9090 }}
+  type: ClusterIP