Skip to content

Commit 58ef159

Browse files
committed
feat: add inference-gateway deployment
Signed-off-by: Shane Utt <[email protected]>
1 parent 8c4eb46 commit 58ef159

File tree

7 files changed

+169
-0
lines changed

7 files changed

+169
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: endpoint-picker-config
5+
data:
6+
config.yaml: |
7+
pod_selector:
8+
ai-aware-router-pod: true
9+
routing_filters:
10+
routing_scorers:
11+
- name: session-affinity
12+
weight: 60
13+
- name: route-by-active-lora
14+
weight: 50
15+
routing_header: x-ai-aware-router-routing
16+
session_id_header: x-ai-aware-router-session-id
17+
listening_port: 9080
18+
inference_port: 8000
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
replicas: 1
7+
selector:
8+
matchLabels:
9+
app: endpoint-picker
10+
template:
11+
metadata:
12+
labels:
13+
app: endpoint-picker
14+
spec:
15+
serviceAccountName: endpoint-picker
16+
containers:
17+
- name: endpoint-picker
18+
image: inference-router/router-ext-proc:latest
19+
args:
20+
- "--config-file"
21+
- "/etc/endpoint-picker/config.yaml"
22+
ports:
23+
- name: grpc
24+
containerPort: 9080
25+
protocol: TCP
26+
volumeMounts:
27+
- name: endpoint-picker-config
28+
mountPath: /etc/endpoint-picker
29+
volumes:
30+
- name: endpoint-picker-config
31+
configMap:
32+
name: endpoint-picker-config
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: networking.istio.io/v1alpha3
2+
kind: EnvoyFilter
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
configPatches:
7+
- applyTo: HTTP_FILTER
8+
match:
9+
listener:
10+
filterChain:
11+
filter:
12+
name: "envoy.filters.network.http_connection_manager"
13+
patch:
14+
operation: INSERT_FIRST
15+
value:
16+
name: envoy.filters.http.ext_proc
17+
typed_config:
18+
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
19+
failure_mode_allow: false
20+
allow_mode_override: true
21+
processing_mode:
22+
request_header_mode: "SEND"
23+
response_header_mode: "SEND"
24+
request_body_mode: "BUFFERED"
25+
response_body_mode: "BUFFERED"
26+
request_trailer_mode: "SEND"
27+
response_trailer_mode: "SKIP"
28+
grpc_service:
29+
envoy_grpc:
30+
cluster_name: outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
31+
timeout: 5s
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: Gateway
3+
metadata:
4+
name: inference-gateway
5+
labels:
6+
istio.io/rev: istio-control-plane
7+
annotations:
8+
networking.istio.io/service-type: ClusterIP
9+
spec:
10+
gatewayClassName: istio
11+
listeners:
12+
- name: default
13+
port: 80
14+
protocol: HTTP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# ------------------------------------------------------------------------------
2+
# Inference Gateway
3+
#
4+
# This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
5+
# the Gateway with an EnvoyFilter.
6+
#
7+
# Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
8+
#
9+
# **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
10+
# with the namespace to match the EPP's Service. For now use sed to replace it,
11+
# e.g.:
12+
#
13+
# $ kubectl kustomize deploy/components/inference-gateway \
14+
# | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
15+
# | kubectl -n mynamespace apply -f -
16+
# ------------------------------------------------------------------------------
17+
apiVersion: kustomize.config.k8s.io/v1beta1
18+
kind: Kustomization
19+
20+
resources:
21+
- configmaps.yaml
22+
- deployments.yaml
23+
- services.yaml
24+
- rbac.yaml
25+
- gateways.yaml
26+
- envoy-filters.yaml
27+
28+
images:
29+
- name: inference-router/router-ext-proc
30+
newTag: 0.0.1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: endpoint-picker
5+
---
6+
apiVersion: rbac.authorization.k8s.io/v1
7+
kind: Role
8+
metadata:
9+
name: endpoint-picker
10+
rules:
11+
- apiGroups:
12+
- ""
13+
resources:
14+
- "pods"
15+
verbs:
16+
- "get"
17+
- "list"
18+
- "watch"
19+
---
20+
apiVersion: rbac.authorization.k8s.io/v1
21+
kind: RoleBinding
22+
metadata:
23+
name: endpoint-picker-binding
24+
subjects:
25+
- kind: ServiceAccount
26+
name: endpoint-picker
27+
roleRef:
28+
kind: Role
29+
name: endpoint-picker
30+
apiGroup: rbac.authorization.k8s.io
31+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
type: ClusterIP
7+
selector:
8+
app: endpoint-picker
9+
ports:
10+
- name: grpc
11+
protocol: TCP
12+
port: 9080
13+
targetPort: 9080

0 commit comments

Comments
 (0)