Skip to content

Commit dad8db2

Browse files
authored
Merge pull request kubernetes-sigs#4 from shaneutt/shaneutt/initial-dev-deployments
First iteration of development deployments & environments
2 parents d8303a0 + c679724 commit dad8db2

30 files changed

+569
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
apiVersion: v1
2+
kind: ConfigMap
3+
metadata:
4+
name: endpoint-picker-config
5+
data:
6+
config.yaml: |
7+
pod_selector:
8+
ai-aware-router-pod: true
9+
routing_filters:
10+
routing_scorers:
11+
- name: session-affinity
12+
weight: 60
13+
- name: route-by-active-lora
14+
weight: 50
15+
routing_header: x-ai-aware-router-routing
16+
session_id_header: x-ai-aware-router-session-id
17+
listening_port: 9080
18+
inference_port: 8000
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
replicas: 1
7+
selector:
8+
matchLabels:
9+
app: endpoint-picker
10+
template:
11+
metadata:
12+
labels:
13+
app: endpoint-picker
14+
spec:
15+
serviceAccountName: endpoint-picker
16+
containers:
17+
- name: endpoint-picker
18+
image: inference-router/router-ext-proc:latest
19+
args:
20+
- "--config-file"
21+
- "/etc/endpoint-picker/config.yaml"
22+
ports:
23+
- name: grpc
24+
containerPort: 9080
25+
protocol: TCP
26+
volumeMounts:
27+
- name: endpoint-picker-config
28+
mountPath: /etc/endpoint-picker
29+
volumes:
30+
- name: endpoint-picker-config
31+
configMap:
32+
name: endpoint-picker-config
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: networking.istio.io/v1alpha3
2+
kind: EnvoyFilter
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
configPatches:
7+
- applyTo: HTTP_FILTER
8+
match:
9+
listener:
10+
filterChain:
11+
filter:
12+
name: "envoy.filters.network.http_connection_manager"
13+
patch:
14+
operation: INSERT_FIRST
15+
value:
16+
name: envoy.filters.http.ext_proc
17+
typed_config:
18+
"@type": type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
19+
failure_mode_allow: false
20+
allow_mode_override: true
21+
processing_mode:
22+
request_header_mode: "SEND"
23+
response_header_mode: "SEND"
24+
request_body_mode: "BUFFERED"
25+
response_body_mode: "BUFFERED"
26+
request_trailer_mode: "SEND"
27+
response_trailer_mode: "SKIP"
28+
grpc_service:
29+
envoy_grpc:
30+
cluster_name: outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
31+
timeout: 5s
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
apiVersion: gateway.networking.k8s.io/v1
2+
kind: Gateway
3+
metadata:
4+
name: inference-gateway
5+
labels:
6+
istio.io/rev: istio-control-plane
7+
annotations:
8+
networking.istio.io/service-type: ClusterIP
9+
spec:
10+
gatewayClassName: istio
11+
listeners:
12+
- name: default
13+
port: 80
14+
protocol: HTTP
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
# ------------------------------------------------------------------------------
2+
# Inference Gateway
3+
#
4+
# This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
5+
# the Gateway with an EnvoyFilter.
6+
#
7+
# Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
8+
#
9+
# **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
10+
# with the namespace to match the EPP's Service. For now use sed to replace it,
11+
# e.g.:
12+
#
13+
# $ kubectl kustomize deploy/components/inference-gateway \
14+
# | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
15+
# | kubectl -n mynamespace apply -f -
16+
# ------------------------------------------------------------------------------
17+
apiVersion: kustomize.config.k8s.io/v1beta1
18+
kind: Kustomization
19+
20+
resources:
21+
- configmaps.yaml
22+
- deployments.yaml
23+
- services.yaml
24+
- rbac.yaml
25+
- gateways.yaml
26+
- envoy-filters.yaml
27+
28+
images:
29+
- name: inference-router/router-ext-proc
30+
newTag: 0.0.1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
apiVersion: v1
2+
kind: ServiceAccount
3+
metadata:
4+
name: endpoint-picker
5+
---
6+
apiVersion: rbac.authorization.k8s.io/v1
7+
kind: Role
8+
metadata:
9+
name: endpoint-picker
10+
rules:
11+
- apiGroups:
12+
- ""
13+
resources:
14+
- "pods"
15+
verbs:
16+
- "get"
17+
- "list"
18+
- "watch"
19+
---
20+
apiVersion: rbac.authorization.k8s.io/v1
21+
kind: RoleBinding
22+
metadata:
23+
name: endpoint-picker-binding
24+
subjects:
25+
- kind: ServiceAccount
26+
name: endpoint-picker
27+
roleRef:
28+
kind: Role
29+
name: endpoint-picker
30+
apiGroup: rbac.authorization.k8s.io
31+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: v1
2+
kind: Service
3+
metadata:
4+
name: endpoint-picker
5+
spec:
6+
type: ClusterIP
7+
selector:
8+
app: endpoint-picker
9+
ports:
10+
- name: grpc
11+
protocol: TCP
12+
port: 9080
13+
targetPort: 9080
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
apiVersion: sailoperator.io/v1
2+
kind: Istio
3+
metadata:
4+
name: control-plane
5+
spec:
6+
version: v1.25-latest
7+
values:
8+
pilot:
9+
resources:
10+
requests:
11+
cpu: 100m
12+
memory: 1024Mi
13+
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# ------------------------------------------------------------------------------
2+
# Istio Control Plane
3+
#
4+
# This deploys an Istio control-plane for the entire cluster. This enables the
5+
# creation of Gateways.
6+
# ------------------------------------------------------------------------------
7+
apiVersion: kustomize.config.k8s.io/v1beta1
8+
kind: Kustomization
9+
10+
namespace: istio-system
11+
namePrefix: istio-
12+
13+
resources:
14+
- namespaces.yaml
15+
- control-plane.yaml
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: system
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
charts/
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# ------------------------------------------------------------------------------
2+
# Istio Sail Operator
3+
#
4+
# This deploys the Istio Sail Operator via Helm chart to enable the creation
5+
# of Istio Control Planes, and ultimately Gateways. This will also deploy all
6+
# the Istio and Gateway API CRDs.
7+
#
8+
# This is required on Kubernetes clusters, and OpenShift clusters versions
9+
# below 4.19 (OpenShift 4.19+ includes all this by default).
10+
#
11+
# **Warning**: This needs to be deployed before, and separately from other
12+
# components as it deploys CRDs. It can be deployed with:
13+
#
14+
# $ kubectl kustomize --enable-helm deploy/components/sail-operator/ \
15+
# | kubectl apply --server-side --force-conflicts -f -
16+
#
17+
# ------------------------------------------------------------------------------
18+
apiVersion: kustomize.config.k8s.io/v1beta1
19+
kind: Kustomization
20+
21+
namespace: sail-operator
22+
23+
resources:
24+
- https://github.com/kubernetes-sigs/gateway-api/config/crd?ref=v1.2.1
25+
- namespaces.yaml
26+
27+
helmCharts:
28+
- name: sail-operator
29+
namespace: sail-operator
30+
repo: https://istio-ecosystem.github.io/sail-operator
31+
version: 1.25.1
32+
includeCRDs: true
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
apiVersion: v1
2+
kind: Namespace
3+
metadata:
4+
name: sail-operator
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
apiVersion: apps/v1
2+
kind: Deployment
3+
metadata:
4+
name: vllm-30801
5+
labels:
6+
app: vllm-30801
7+
spec:
8+
replicas: 1
9+
selector:
10+
matchLabels:
11+
app: vllm-30801
12+
template:
13+
metadata:
14+
labels:
15+
app: vllm-30801
16+
ai-aware-router-pod: "true"
17+
annotations:
18+
ai-aware-router-address: 127.0.0.1:30801
19+
spec:
20+
containers:
21+
- name: vllm
22+
image: vllm-sim/vllm-sim:latest
23+
args:
24+
- "--port=30801"
25+
- "--model=model1"
26+
- "--lora=lora1,lora2"
27+
ports:
28+
- containerPort: 30801
29+
---
30+
apiVersion: apps/v1
31+
kind: Deployment
32+
metadata:
33+
name: vllm-30802
34+
labels:
35+
app: vllm-30802
36+
spec:
37+
replicas: 1
38+
selector:
39+
matchLabels:
40+
app: vllm-30802
41+
template:
42+
metadata:
43+
labels:
44+
app: vllm-30802
45+
ai-aware-router-pod: "true"
46+
annotations:
47+
ai-aware-router-address: 127.0.0.1:30802
48+
spec:
49+
containers:
50+
- name: vllm
51+
image: vllm-sim/vllm-sim:latest
52+
args:
53+
- "--port=30802"
54+
- "--model=model1"
55+
- "--lora=lora1,lora2"
56+
ports:
57+
- containerPort: 30802
58+
---
59+
apiVersion: apps/v1
60+
kind: Deployment
61+
metadata:
62+
name: vllm-30803
63+
labels:
64+
app: vllm-30803
65+
spec:
66+
replicas: 1
67+
selector:
68+
matchLabels:
69+
app: vllm-30803
70+
template:
71+
metadata:
72+
labels:
73+
app: vllm-30803
74+
ai-aware-router-pod: "true"
75+
annotations:
76+
ai-aware-router-address: 127.0.0.1:30803
77+
spec:
78+
containers:
79+
- name: vllm
80+
image: vllm-sim/vllm-sim:latest
81+
args:
82+
- "--port=30803"
83+
- "--model=model2"
84+
- "--lora=lora3"
85+
ports:
86+
- containerPort: 30803
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
# ------------------------------------------------------------------------------
2+
# VLLM Simulator
3+
#
4+
# This deploys a VLLM simulator which can be used to simulate inference for
5+
# small environments (e.g. Kubernetes In Docker (KIND) clusters) or for simple
6+
# tests.
7+
# ------------------------------------------------------------------------------
8+
apiVersion: kustomize.config.k8s.io/v1beta1
9+
kind: Kustomization
10+
11+
resources:
12+
- deployments.yaml
13+
- services.yaml
14+
15+
images:
16+
- name: vllm-sim/vllm-sim
17+
newTag: 0.0.2
+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
kind: Service
2+
apiVersion: v1
3+
metadata:
4+
name: vllm-30801
5+
spec:
6+
type: ClusterIP
7+
selector:
8+
app: vllm-30801
9+
ports:
10+
- protocol: TCP
11+
port: 30801
12+
targetPort: 30801
13+
---
14+
kind: Service
15+
apiVersion: v1
16+
metadata:
17+
name: vllm-30802
18+
spec:
19+
type: ClusterIP
20+
selector:
21+
app: vllm-30802
22+
ports:
23+
- protocol: TCP
24+
port: 30802
25+
targetPort: 30802
26+
---
27+
kind: Service
28+
apiVersion: v1
29+
metadata:
30+
name: vllm-30803
31+
spec:
32+
type: ClusterIP
33+
selector:
34+
app: vllm-30803
35+
ports:
36+
- protocol: TCP
37+
port: 30803
38+
targetPort: 30803

0 commit comments

Comments
 (0)