File tree 7 files changed +169
-0
lines changed
deploy/components/inference-gateway
7 files changed +169
-0
lines changed Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : ConfigMap
3
+ metadata :
4
+ name : endpoint-picker-config
5
+ data :
6
+ config.yaml : |
7
+ pod_selector:
8
+ ai-aware-router-pod: true
9
+ routing_filters:
10
+ routing_scorers:
11
+ - name: session-affinity
12
+ weight: 60
13
+ - name: route-by-active-lora
14
+ weight: 50
15
+ routing_header: x-ai-aware-router-routing
16
+ session_id_header: x-ai-aware-router-session-id
17
+ listening_port: 9080
18
+ inference_port: 8000
Original file line number Diff line number Diff line change
1
+ apiVersion : apps/v1
2
+ kind : Deployment
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ replicas : 1
7
+ selector :
8
+ matchLabels :
9
+ app : endpoint-picker
10
+ template :
11
+ metadata :
12
+ labels :
13
+ app : endpoint-picker
14
+ spec :
15
+ serviceAccountName : endpoint-picker
16
+ containers :
17
+ - name : endpoint-picker
18
+ image : inference-router/router-ext-proc:latest
19
+ args :
20
+ - " --config-file"
21
+ - " /etc/endpoint-picker/config.yaml"
22
+ ports :
23
+ - name : grpc
24
+ containerPort : 9080
25
+ protocol : TCP
26
+ volumeMounts :
27
+ - name : endpoint-picker-config
28
+ mountPath : /etc/endpoint-picker
29
+ volumes :
30
+ - name : endpoint-picker-config
31
+ configMap :
32
+ name : endpoint-picker-config
Original file line number Diff line number Diff line change
1
+ apiVersion : networking.istio.io/v1alpha3
2
+ kind : EnvoyFilter
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ configPatches :
7
+ - applyTo : HTTP_FILTER
8
+ match :
9
+ listener :
10
+ filterChain :
11
+ filter :
12
+ name : " envoy.filters.network.http_connection_manager"
13
+ patch :
14
+ operation : INSERT_FIRST
15
+ value :
16
+ name : envoy.filters.http.ext_proc
17
+ typed_config :
18
+ " @type " : type.googleapis.com/envoy.extensions.filters.http.ext_proc.v3.ExternalProcessor
19
+ failure_mode_allow : false
20
+ allow_mode_override : true
21
+ processing_mode :
22
+ request_header_mode : " SEND"
23
+ response_header_mode : " SEND"
24
+ request_body_mode : " BUFFERED"
25
+ response_body_mode : " BUFFERED"
26
+ request_trailer_mode : " SEND"
27
+ response_trailer_mode : " SKIP"
28
+ grpc_service :
29
+ envoy_grpc :
30
+ cluster_name : outbound|9080||endpoint-picker.REPLACE_NAMESPACE.svc.cluster.local
31
+ timeout : 5s
Original file line number Diff line number Diff line change
1
+ apiVersion : gateway.networking.k8s.io/v1
2
+ kind : Gateway
3
+ metadata :
4
+ name : inference-gateway
5
+ labels :
6
+ istio.io/rev : istio-control-plane
7
+ annotations :
8
+ networking.istio.io/service-type : ClusterIP
9
+ spec :
10
+ gatewayClassName : istio
11
+ listeners :
12
+ - name : default
13
+ port : 80
14
+ protocol : HTTP
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # Inference Gateway
3
+ #
4
+ # This deploys a Gateway and the Endpoint Picker (EPP), and attaches the EPP to
5
+ # the Gateway with an EnvoyFilter.
6
+ #
7
+ # Add an HTTPRoute to route traffic to VLLM, or a VLLM simulator.
8
+ #
9
+ # **WARNING**: The EnvoyFilter contains a variable that needs to be replaced
10
+ # with the namespace to match the EPP's Service. For now use sed to replace it,
11
+ # e.g.:
12
+ #
13
+ # $ kubectl kustomize deploy/components/inference-gateway \
14
+ # | sed 's/REPLACE_NAMESPACE/mynamespace/gI' \
15
+ # | kubectl -n mynamespace apply -f -
16
+ # ------------------------------------------------------------------------------
17
+ apiVersion : kustomize.config.k8s.io/v1beta1
18
+ kind : Kustomization
19
+
20
+ resources :
21
+ - configmaps.yaml
22
+ - deployments.yaml
23
+ - services.yaml
24
+ - rbac.yaml
25
+ - gateways.yaml
26
+ - envoy-filters.yaml
27
+
28
+ images :
29
+ - name : inference-router/router-ext-proc
30
+ newTag : 0.0.1
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : ServiceAccount
3
+ metadata :
4
+ name : endpoint-picker
5
+ ---
6
+ apiVersion : rbac.authorization.k8s.io/v1
7
+ kind : Role
8
+ metadata :
9
+ name : endpoint-picker
10
+ rules :
11
+ - apiGroups :
12
+ - " "
13
+ resources :
14
+ - " pods"
15
+ verbs :
16
+ - " get"
17
+ - " list"
18
+ - " watch"
19
+ ---
20
+ apiVersion : rbac.authorization.k8s.io/v1
21
+ kind : RoleBinding
22
+ metadata :
23
+ name : endpoint-picker-binding
24
+ subjects :
25
+ - kind : ServiceAccount
26
+ name : endpoint-picker
27
+ roleRef :
28
+ kind : Role
29
+ name : endpoint-picker
30
+ apiGroup : rbac.authorization.k8s.io
31
+
Original file line number Diff line number Diff line change
1
+ apiVersion : v1
2
+ kind : Service
3
+ metadata :
4
+ name : endpoint-picker
5
+ spec :
6
+ type : ClusterIP
7
+ selector :
8
+ app : endpoint-picker
9
+ ports :
10
+ - name : grpc
11
+ protocol : TCP
12
+ port : 9080
13
+ targetPort : 9080
You can’t perform that action at this time.
0 commit comments