File tree 2 files changed +50
-0
lines changed
2 files changed +50
-0
lines changed Original file line number Diff line number Diff line change
1
+ apiVersion : gateway.networking.k8s.io/v1
2
+ kind : HTTPRoute
3
+ metadata :
4
+ name : inference-route
5
+ spec :
6
+ parentRefs :
7
+ - name : inference-gateway
8
+ rules :
9
+ - matches :
10
+ - path :
11
+ type : PathPrefix
12
+ value : /v1
13
+ backendRefs :
14
+ - name : vllm-30801
15
+ port : 30801
16
+ - name : vllm-30802
17
+ port : 30802
18
+ - name : vllm-30802
19
+ port : 30802
Original file line number Diff line number Diff line change
1
+ # ------------------------------------------------------------------------------
2
+ # Kubernetes In Docker (KIND) Environment
3
+ #
4
+ # This will deploy the full development stack on a KIND cluster:
5
+ #
6
+ # * Istio Control Plane
7
+ # * VLLM Simulator
8
+ # * Inference Gateway
9
+ #
10
+ # **Note**: The Sail Operator must be deployed first.
11
+ #
12
+ # This will expose the VLLM simulator via an HTTPRoute. You can access the
13
+ # Gateway with a port-forward:
14
+ #
15
+ # $ kubectl port-forward service/inference-gateway-istio 8080:80
16
+ #
17
+ # And the requests can be made:
18
+ #
19
+ # $ curl -v -w '\n' -X POST -H 'Content-Type: application/json' \
20
+ # -d '{"model":"model1","messages":[{"role":"user","content":"Hello!"}]}' \
21
+ # http://localhost:8080/v1/chat/completions
22
+ #
23
+ # ------------------------------------------------------------------------------
24
+ apiVersion : kustomize.config.k8s.io/v1beta1
25
+ kind : Kustomization
26
+
27
+ resources :
28
+ - ../../components/istio-control-plane/
29
+ - ../../components/vllm-sim/
30
+ - ../../components/inference-gateway/
31
+ - httproutes.yaml
You can’t perform that action at this time.
0 commit comments