File tree 30 files changed +569
-0
lines changed
30 files changed +569
-0
lines changed File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : ConfigMap
3
+ metadata :
4
+ name : endpoint-picker-config
5
+ data :
6
+ config.yaml : |
7
+ pod_selector:
8
+ ai-aware-router-pod: true
9
+ routing_filters:
10
+ routing_scorers:
11
+ - name: session-affinity
12
+ weight: 60
13
+ - name: route-by-active-lora
14
+ weight: 50
15
+ routing_header: x-ai-aware-router-routing
16
+ session_id_header: x-ai-aware-router-session-id
17
+ listening_port: 9080
18
+ inference_port: 8000
Original file line number Diff line number Diff line change
1
+ apiVersion : apps/v1
2
+ kind : Deployment
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ replicas : 1
7
+ selector :
8
+ matchLabels :
9
+ app : endpoint-picker
10
+ template :
11
+ metadata :
12
+ labels :
13
+ app : endpoint-picker
14
+ spec :
15
+ serviceAccountName : endpoint-picker
16
+ containers :
17
+ - name : endpoint-picker
18
+ image : inference-router/router-ext-proc:latest
19
+ args :
20
+ - " --config-file"
21
+ - " /etc/endpoint-picker/config.yaml"
22
+ ports :
23
+ - name : grpc
24
+ containerPort : 9080
25
+ protocol : TCP
26
+ volumeMounts :
27
+ - name : endpoint-picker-config
28
+ mountPath : /etc/endpoint-picker
29
+ volumes :
30
+ - name : endpoint-picker-config
31
+ configMap :
32
+ name : endpoint-picker-config
Original file line number Diff line number Diff line change
1
+ apiVersion : networking.istio.io/v1alpha3
2
+ kind : EnvoyFilter
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ configPatches :
7
+ - applyTo : HTTP_FILTER
8
+ match :
9
+ listener :
10
+ filterChain :
11
+ filter :
12
+ name : " envoy.filters.network.http_connection_manager"
13
+ patch :
14
+ operation : INSERT_FIRST
15
+ value :
16
+ name : envoy.filters.http.ext_proc
17
+ typed_config :
18
+ " @type " : type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
19
+ failure_mode_allow : false
20
+ allow_mode_override : true
21
+ processing_mode :
22
+ request_header_mode : " SEND"
23
+ response_header_mode : " SEND"
24
+ request_body_mode : " BUFFERED"
25
+ response_body_mode : " BUFFERED"
26
+ request_trailer_mode : " SEND"
27
+ response_trailer_mode : " SKIP"
28
+ grpc_service :
29
+ envoy_grpc :
30
+ cluster_name : outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
31
+ timeout : 5s
Original file line number Diff line number Diff line change
1
+ apiVersion : gateway.networking.k8s.io/v1
2
+ kind : Gateway
3
+ metadata :
4
+ name : inference-gateway
5
+ labels :
6
+ istio.io/rev : istio-control-plane
7
+ annotations :
8
+ networking.istio.io/service-type : ClusterIP
9
+ spec :
10
+ gatewayClassName : istio
11
+ listeners :
12
+ - name : default
13
+ port : 80
14
+ protocol : HTTP
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # Inference Gateway
3
+ #
4
+ # This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
5
+ # the Gateway with an EnvoyFilter.
6
+ #
7
+ # Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
8
+ #
9
+ # **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
10
+ # with the namespace to match the EPP's Service. For now use sed to replace it,
11
+ # e.g.:
12
+ #
13
+ # $ kubectl kustomize deploy/components/inference-gateway \
14
+ # | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
15
+ # | kubectl -n mynamespace apply -f -
16
+ # ------------------------------------------------------------------------------
17
+ apiVersion : kustomize.config.k8s.io/v1beta1
18
+ kind : Kustomization
19
+
20
+ resources :
21
+ - configmaps.yaml
22
+ - deployments.yaml
23
+ - services.yaml
24
+ - rbac.yaml
25
+ - gateways.yaml
26
+ - envoy-filters.yaml
27
+
28
+ images :
29
+ - name : inference-router/router-ext-proc
30
+ newTag : 0.0.1
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : ServiceAccount
3
+ metadata :
4
+ name : endpoint-picker
5
+ ---
6
+ apiVersion : rbac.authorization.k8s.io/v1
7
+ kind : Role
8
+ metadata :
9
+ name : endpoint-picker
10
+ rules :
11
+ - apiGroups :
12
+ - " "
13
+ resources :
14
+ - " pods"
15
+ verbs :
16
+ - " get"
17
+ - " list"
18
+ - " watch"
19
+ ---
20
+ apiVersion : rbac.authorization.k8s.io/v1
21
+ kind : RoleBinding
22
+ metadata :
23
+ name : endpoint-picker-binding
24
+ subjects :
25
+ - kind : ServiceAccount
26
+ name : endpoint-picker
27
+ roleRef :
28
+ kind : Role
29
+ name : endpoint-picker
30
+ apiGroup : rbac.authorization.k8s.io
31
+
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : Service
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ type : ClusterIP
7
+ selector :
8
+ app : endpoint-picker
9
+ ports :
10
+ - name : grpc
11
+ protocol : TCP
12
+ port : 9080
13
+ targetPort : 9080
Original file line number Diff line number Diff line change
1
+ apiVersion : sailoperator.io/v1
2
+ kind : Istio
3
+ metadata :
4
+ name : control-plane
5
+ spec :
6
+ version : v1.25-latest
7
+ values :
8
+ pilot :
9
+ resources :
10
+ requests :
11
+ cpu : 100m
12
+ memory : 1024Mi
13
+
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # Istio Control Plane
3
+ #
4
+ # This deploys an Istio control-plane for the entire cluster. This enables the
5
+ # creation of Gateways.
6
+ # ------------------------------------------------------------------------------
7
+ apiVersion : kustomize.config.k8s.io/v1beta1
8
+ kind : Kustomization
9
+
10
+ namespace : istio-system
11
+ namePrefix : istio-
12
+
13
+ resources :
14
+ - namespaces.yaml
15
+ - control-plane.yaml
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : Namespace
3
+ metadata :
4
+ name : system
Original file line number Diff line number Diff line change
1
+ charts /
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # Istio Sail Operator
3
+ #
4
+ # This deploys the Istio Sail Operator via Helm chart to enable the creation
5
+ # of Istio Control Planes, and ultimately Gateways. This will also deploy all
6
+ # the Istio and Gateway API CRDs.
7
+ #
8
+ # This is required on Kubernetes clusters, and OpenShift clusters versions
9
+ # below 4.19 (OpenShift 4.19+ includes all this by default).
10
+ #
11
+ # **Warning**: This needs to be deployed before, and separately from other
12
+ # components as it deploys CRDs. It can be deployed with:
13
+ #
14
+ # $ kubectl kustomize --enable-helm deploy/components/sail-operator/ \
15
+ # | kubectl apply --server-side --force-conflicts -f -
16
+ #
17
+ # ------------------------------------------------------------------------------
18
+ apiVersion : kustomize.config.k8s.io/v1beta1
19
+ kind : Kustomization
20
+
21
+ namespace : sail-operator
22
+
23
+ resources :
24
+ - https://github.com/kubernetes-sigs/gateway-api/config/crd?ref=v1.2.1
25
+ - namespaces.yaml
26
+
27
+ helmCharts :
28
+ - name : sail-operator
29
+ namespace : sail-operator
30
+ repo : https://istio-ecosystem.github.io/sail-operator
31
+ version : 1.25.1
32
+ includeCRDs : true
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : Namespace
3
+ metadata :
4
+ name : sail-operator
Original file line number Diff line number Diff line change
1
+ apiVersion : apps/v1
2
+ kind : Deployment
3
+ metadata :
4
+ name : vllm-30801
5
+ labels :
6
+ app : vllm-30801
7
+ spec :
8
+ replicas : 1
9
+ selector :
10
+ matchLabels :
11
+ app : vllm-30801
12
+ template :
13
+ metadata :
14
+ labels :
15
+ app : vllm-30801
16
+ ai-aware-router-pod : " true"
17
+ annotations :
18
+ ai-aware-router-address : 127.0.0.1:30801
19
+ spec :
20
+ containers :
21
+ - name : vllm
22
+ image : vllm-sim/vllm-sim:latest
23
+ args :
24
+ - " --port=30801"
25
+ - " --model=model1"
26
+ - " --lora=lora1,lora2"
27
+ ports :
28
+ - containerPort : 30801
29
+ ---
30
+ apiVersion : apps/v1
31
+ kind : Deployment
32
+ metadata :
33
+ name : vllm-30802
34
+ labels :
35
+ app : vllm-30802
36
+ spec :
37
+ replicas : 1
38
+ selector :
39
+ matchLabels :
40
+ app : vllm-30802
41
+ template :
42
+ metadata :
43
+ labels :
44
+ app : vllm-30802
45
+ ai-aware-router-pod : " true"
46
+ annotations :
47
+ ai-aware-router-address : 127.0.0.1:30802
48
+ spec :
49
+ containers :
50
+ - name : vllm
51
+ image : vllm-sim/vllm-sim:latest
52
+ args :
53
+ - " --port=30802"
54
+ - " --model=model1"
55
+ - " --lora=lora1,lora2"
56
+ ports :
57
+ - containerPort : 30802
58
+ ---
59
+ apiVersion : apps/v1
60
+ kind : Deployment
61
+ metadata :
62
+ name : vllm-30803
63
+ labels :
64
+ app : vllm-30803
65
+ spec :
66
+ replicas : 1
67
+ selector :
68
+ matchLabels :
69
+ app : vllm-30803
70
+ template :
71
+ metadata :
72
+ labels :
73
+ app : vllm-30803
74
+ ai-aware-router-pod : " true"
75
+ annotations :
76
+ ai-aware-router-address : 127.0.0.1:30803
77
+ spec :
78
+ containers :
79
+ - name : vllm
80
+ image : vllm-sim/vllm-sim:latest
81
+ args :
82
+ - " --port=30803"
83
+ - " --model=model2"
84
+ - " --lora=lora3"
85
+ ports :
86
+ - containerPort : 30803
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # VLLM Simulator
3
+ #
4
+ # This deploys a VLLM simulator which can be used to simulate inference for
5
+ # small environments (e.g. Kubernetes In Docker (KIND) clusters) or for simple
6
+ # tests.
7
+ # ------------------------------------------------------------------------------
8
+ apiVersion : kustomize.config.k8s.io/v1beta1
9
+ kind : Kustomization
10
+
11
+ resources :
12
+ - deployments.yaml
13
+ - services.yaml
14
+
15
+ images :
16
+ - name : vllm-sim/vllm-sim
17
+ newTag : 0.0.2
Original file line number Diff line number Diff line change
1
+ kind : Service
2
+ apiVersion : v1
3
+ metadata :
4
+ name : vllm-30801
5
+ spec :
6
+ type : ClusterIP
7
+ selector :
8
+ app : vllm-30801
9
+ ports :
10
+ - protocol : TCP
11
+ port : 30801
12
+ targetPort : 30801
13
+ ---
14
+ kind : Service
15
+ apiVersion : v1
16
+ metadata :
17
+ name : vllm-30802
18
+ spec :
19
+ type : ClusterIP
20
+ selector :
21
+ app : vllm-30802
22
+ ports :
23
+ - protocol : TCP
24
+ port : 30802
25
+ targetPort : 30802
26
+ ---
27
+ kind : Service
28
+ apiVersion : v1
29
+ metadata :
30
+ name : vllm-30803
31
+ spec :
32
+ type : ClusterIP
33
+ selector :
34
+ app : vllm-30803
35
+ ports :
36
+ - protocol : TCP
37
+ port : 30803
38
+ targetPort : 30803
You can’t perform that action at this time.
0 commit comments