Skip to content

Commit 16b95a2

Browse files
authored
Adds Initial e2e Tests and Tooling (#217)
* Adds initial e2e tests and tooling Signed-off-by: Daneyon Hansen <[email protected]> * Refactors e2e for manifest approach Signed-off-by: Daneyon Hansen <[email protected]> * Adds e2e test readme Signed-off-by: Daneyon Hansen <[email protected]> * Uses a separate model server secret for e2e Signed-off-by: Daneyon Hansen <[email protected]> --------- Signed-off-by: Daneyon Hansen <[email protected]>
1 parent 4448a4b commit 16b95a2

16 files changed

+1141
-220
lines changed

Makefile

+2-3
Original file line numberDiff line numberDiff line change
@@ -105,9 +105,8 @@ vet: ## Run go vet against code.
105105
test: manifests generate fmt vet envtest ## Run tests.
106106
KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out
107107

108-
# Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors.
109-
.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up.
110-
test-e2e:
108+
.PHONY: test-e2e
109+
test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs.
111110
go test ./test/e2e/ -v -ginkgo.v
112111

113112
.PHONY: lint

README.md

+4
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,10 @@ This project is currently in development.
1212

1313
Follow this [README](./pkg/README.md) to get the inference-extension up and running on your cluster!
1414

15+
## End-to-End Tests
16+
17+
Follow this [README](./test/e2e/README.md) to learn more about running the inference-extension end-to-end test suite on your cluster.
18+
1519
## Website
1620

1721
Detailed documentation is available on our website: https://gateway-api-inference-extension.sigs.k8s.io/

go.mod

+7-3
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,13 @@ require (
2020
google.golang.org/grpc v1.70.0
2121
google.golang.org/protobuf v1.36.4
2222
k8s.io/api v0.32.1
23+
k8s.io/apiextensions-apiserver v0.32.1
2324
k8s.io/apimachinery v0.32.1
2425
k8s.io/client-go v0.32.1
2526
k8s.io/code-generator v0.32.1
2627
k8s.io/component-base v0.32.1
2728
k8s.io/klog/v2 v2.130.1
29+
k8s.io/utils v0.0.0-20241210054802-24370beab758
2830
sigs.k8s.io/controller-runtime v0.20.1
2931
sigs.k8s.io/structured-merge-diff/v4 v4.5.0
3032
)
@@ -73,6 +75,7 @@ require (
7375
github.com/google/gofuzz v1.2.0 // indirect
7476
github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad // indirect
7577
github.com/google/uuid v1.6.0 // indirect
78+
github.com/gorilla/websocket v1.5.0 // indirect
7679
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 // indirect
7780
github.com/huandu/xstrings v1.3.3 // indirect
7881
github.com/imdario/mergo v0.3.11 // indirect
@@ -87,9 +90,11 @@ require (
8790
github.com/mattn/go-isatty v0.0.20 // indirect
8891
github.com/mitchellh/copystructure v1.2.0 // indirect
8992
github.com/mitchellh/reflectwalk v1.0.2 // indirect
93+
github.com/moby/spdystream v0.5.0 // indirect
9094
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
9195
github.com/modern-go/reflect2 v1.0.2 // indirect
9296
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
97+
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f // indirect
9398
github.com/pkg/errors v0.9.1 // indirect
9499
github.com/planetscale/vtprotobuf v0.6.1-0.20240319094008-0393e58bdf10 // indirect
95100
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
@@ -99,6 +104,7 @@ require (
99104
github.com/spf13/cobra v1.8.1 // indirect
100105
github.com/spf13/pflag v1.0.5 // indirect
101106
github.com/stoewer/go-strcase v1.3.0 // indirect
107+
github.com/stretchr/objx v0.5.2 // indirect
102108
github.com/x448/float16 v0.8.4 // indirect
103109
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.53.0 // indirect
104110
go.opentelemetry.io/otel v1.32.0 // indirect
@@ -128,11 +134,9 @@ require (
128134
gopkg.in/inf.v0 v0.9.1 // indirect
129135
gopkg.in/yaml.v2 v2.4.0 // indirect
130136
gopkg.in/yaml.v3 v3.0.1 // indirect
131-
k8s.io/apiextensions-apiserver v0.32.0 // indirect
132-
k8s.io/apiserver v0.32.0 // indirect
137+
k8s.io/apiserver v0.32.1 // indirect
133138
k8s.io/gengo/v2 v2.0.0-20240911193312-2b36238f13e9 // indirect
134139
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f // indirect
135-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 // indirect
136140
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 // indirect
137141
sigs.k8s.io/controller-tools v0.14.0 // indirect
138142
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect

go.sum

+16-6
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751 h1:JYp7IbQjafo
1919
github.com/alecthomas/template v0.0.0-20190718012654-fb15b899a751/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
2020
github.com/antlr4-go/antlr/v4 v4.13.0 h1:lxCg3LAv+EUK6t1i0y1V6/SLeUi0eKEKdhQAlS8TVTI=
2121
github.com/antlr4-go/antlr/v4 v4.13.0/go.mod h1:pfChB/xh/Unjila75QW7+VU4TSnWnnk9UTnmpPaOR2g=
22+
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio=
23+
github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs=
2224
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a h1:idn718Q4B6AGu/h5Sxe66HYVdqdGu2l9Iebqhi/AEoA=
2325
github.com/asaskevich/govalidator v0.0.0-20190424111038-f61b66f89f4a/go.mod h1:lB+ZfQJz7igIIfQNfa7Ml4HSf2uFQQRzpGGRXenZAgY=
2426
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
@@ -116,6 +118,8 @@ github.com/google/pprof v0.0.0-20241210010833-40e02aabc2ad/go.mod h1:vavhavw2zAx
116118
github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
117119
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
118120
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
121+
github.com/gorilla/websocket v1.5.0 h1:PPwGk2jz7EePpoHN/+ClbZu8SPxiqlu12wZP/3sWmnc=
122+
github.com/gorilla/websocket v1.5.0/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
119123
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0 h1:bkypFPDjIYGfCYD5mRBvpqxfYX1YCS1PXdKYWi8FsN0=
120124
github.com/grpc-ecosystem/grpc-gateway/v2 v2.20.0/go.mod h1:P+Lt/0by1T8bfcF3z737NnSbmxQAppXMRziHUxPOC8k=
121125
github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4=
@@ -160,13 +164,17 @@ github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HK
160164
github.com/mitchellh/reflectwalk v1.0.0/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
161165
github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ=
162166
github.com/mitchellh/reflectwalk v1.0.2/go.mod h1:mSTlrgnPZtwu0c4WaC2kGObEpuNDbx0jmZXqmk4esnw=
167+
github.com/moby/spdystream v0.5.0 h1:7r0J1Si3QO/kjRitvSLVVFUjxMEb/YLj6S9FF62JBCU=
168+
github.com/moby/spdystream v0.5.0/go.mod h1:xBAYlnt/ay+11ShkdFKNAG7LsyK/tmNBVvVOwrfMgdI=
163169
github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
164170
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg=
165171
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q=
166172
github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M=
167173
github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk=
168174
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
169175
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
176+
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f h1:y5//uYreIhSUg3J1GEMiLbxo1LJaP8RfCpH6pymGZus=
177+
github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f/go.mod h1:ZdcZmHo+o7JKHSa8/e818NopupXU1YMK5fe1lsApnBw=
170178
github.com/nxadm/tail v1.4.8 h1:nPr65rt6Y5JFSKQO7qToXr7pePgD6Gwiw05lkbyAQTE=
171179
github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU=
172180
github.com/onsi/ginkgo v1.16.5 h1:8xi0RTUf59SOSfEtZMvwTvXYMzG4gV23XVHOZiXNtnE=
@@ -207,6 +215,8 @@ github.com/stoewer/go-strcase v1.3.0/go.mod h1:fAH5hQ5pehh+j3nZfvwdk2RgEgQjAoM8w
207215
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
208216
github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
209217
github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
218+
github.com/stretchr/objx v0.5.2 h1:xuMeJ0Sdp5ZMRXx/aWO6RZxdr3beISkG5/G/aIRr3pY=
219+
github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA=
210220
github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs=
211221
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
212222
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
@@ -341,12 +351,12 @@ gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
341351
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
342352
k8s.io/api v0.32.1 h1:f562zw9cy+GvXzXf0CKlVQ7yHJVYzLfL6JAS4kOAaOc=
343353
k8s.io/api v0.32.1/go.mod h1:/Yi/BqkuueW1BgpoePYBRdDYfjPF5sgTr5+YqDZra5k=
344-
k8s.io/apiextensions-apiserver v0.32.0 h1:S0Xlqt51qzzqjKPxfgX1xh4HBZE+p8KKBq+k2SWNOE0=
345-
k8s.io/apiextensions-apiserver v0.32.0/go.mod h1:86hblMvN5yxMvZrZFX2OhIHAuFIMJIZ19bTvzkP+Fmw=
354+
k8s.io/apiextensions-apiserver v0.32.1 h1:hjkALhRUeCariC8DiVmb5jj0VjIc1N0DREP32+6UXZw=
355+
k8s.io/apiextensions-apiserver v0.32.1/go.mod h1:sxWIGuGiYov7Io1fAS2X06NjMIk5CbRHc2StSmbaQto=
346356
k8s.io/apimachinery v0.32.1 h1:683ENpaCBjma4CYqsmZyhEzrGz6cjn1MY/X2jB2hkZs=
347357
k8s.io/apimachinery v0.32.1/go.mod h1:GpHVgxoKlTxClKcteaeuF1Ul/lDVb74KpZcxcmLDElE=
348-
k8s.io/apiserver v0.32.0 h1:VJ89ZvQZ8p1sLeiWdRJpRD6oLozNZD2+qVSLi+ft5Qs=
349-
k8s.io/apiserver v0.32.0/go.mod h1:HFh+dM1/BE/Hm4bS4nTXHVfN6Z6tFIZPi649n83b4Ag=
358+
k8s.io/apiserver v0.32.1 h1:oo0OozRos66WFq87Zc5tclUX2r0mymoVHRq8JmR7Aak=
359+
k8s.io/apiserver v0.32.1/go.mod h1:UcB9tWjBY7aryeI5zAgzVJB/6k7E97bkr1RgqDz0jPw=
350360
k8s.io/client-go v0.32.1 h1:otM0AxdhdBIaQh7l1Q0jQpmo7WOFIk5FFa4bg6YMdUU=
351361
k8s.io/client-go v0.32.1/go.mod h1:aTTKZY7MdxUaJ/KiUs8D+GssR9zJZi77ZqtzcGXIiDg=
352362
k8s.io/code-generator v0.32.1 h1:4lw1kFNDuFYXquTkB7Sl5EwPMUP2yyW9hh6BnFfRZFY=
@@ -359,8 +369,8 @@ k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk=
359369
k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE=
360370
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f h1:GA7//TjRY9yWGy1poLzYYJJ4JRdzg3+O6e8I+e+8T5Y=
361371
k8s.io/kube-openapi v0.0.0-20241105132330-32ad38e42d3f/go.mod h1:R/HEjbvWI0qdfb8viZUeVZm0X6IZnxAydC7YU42CMw4=
362-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738 h1:M3sRQVHv7vB20Xc2ybTt7ODCeFj6JSWYFzOFnYeS6Ro=
363-
k8s.io/utils v0.0.0-20241104100929-3ea5e8cea738/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
372+
k8s.io/utils v0.0.0-20241210054802-24370beab758 h1:sdbE21q2nlQtFh65saZY+rRM6x6aJJI8IUa1AmH/qa0=
373+
k8s.io/utils v0.0.0-20241210054802-24370beab758/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0=
364374
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0 h1:CPT0ExVicCzcpeN4baWEV2ko2Z/AsiZgEdwgcfwLgMo=
365375
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.31.0/go.mod h1:Ve9uj1L+deCXFrPOk1LpFXqTg7LCFzFso6PA48q/XZw=
366376
sigs.k8s.io/controller-runtime v0.20.1 h1:JbGMAG/X94NeM3xvjenVUaBjy6Ui4Ogd/J5ZtjZnHaE=

pkg/README.md

+19-12
Original file line numberDiff line numberDiff line change
@@ -4,31 +4,34 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
44

55
### Requirements
66
- Envoy Gateway [v1.2.1](https://gateway.envoyproxy.io/docs/install/install-yaml/#install-with-yaml) or higher
7-
- A cluster that has built-in support for `ServiceType=LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running)
8-
- For example, with Kind, you can follow these steps: https://kind.sigs.k8s.io/docs/user/loadbalancer
7+
- A cluster with:
8+
- Support for Services of type `LoadBalancer`. (This can be validated by ensuring your Envoy Gateway is up and running). For example, with Kind,
9+
you can follow [these steps](https://kind.sigs.k8s.io/docs/user/loadbalancer).
10+
- 3 GPUs to run the sample model server. Adjust the number of replicas in `./manifests/vllm/deployment.yaml` as needed.
911

1012
### Steps
1113

12-
1. **Deploy Sample vLLM Application**
14+
1. **Deploy Sample Model Server**
1315

14-
Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
16+
Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
1517
Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
1618
```bash
1719
kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
18-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/vllm-lora-deployment.yaml
20+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
1921
```
2022

21-
1. **Install the CRDs into the cluster:**
23+
1. **Install the Inference Extension CRDs:**
2224

2325
```sh
2426
kubectl apply -k https://github.com/kubernetes-sigs/gateway-api-inference-extension/config/crd
2527
```
2628

27-
1. **Deploy InferenceModel and InferencePool**
29+
1. **Deploy InferenceModel**
2830

29-
Deploy a sample InferenceModel and InferencePool configuration based on the vLLM deployments mentioned above.
31+
Deploy the sample InferenceModel which is configured to load balance traffic between the `tweet-summary-0` and `tweet-summary-1`
32+
[LoRA adapters](https://docs.vllm.ai/en/latest/features/lora.html) of the sample model server.
3033
```bash
31-
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencepool-with-model.yaml
34+
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/inferencemodel.yaml
3235
```
3336

3437
1. **Update Envoy Gateway Config to enable Patch Policy**
@@ -46,11 +49,15 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
4649
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/gateway/gateway.yaml
4750
```
4851
> **_NOTE:_** This file couples together the gateway infra and the HTTPRoute infra for a convenient, quick startup. Creating additional/different InferencePools on the same gateway will require an additional set of: `Backend`, `HTTPRoute`, the resources included in the `./manifests/gateway/ext-proc.yaml` file, and an additional `./manifests/gateway/patch_policy.yaml` file. ***Should you choose to experiment, familiarity with xDS and Envoy are very useful.***
49-
50-
5152
53+
Confirm that the Gateway was assigned an IP address and reports a `Programmed=True` status:
54+
```bash
55+
$ kubectl get gateway inference-gateway
56+
NAME CLASS ADDRESS PROGRAMMED AGE
57+
inference-gateway inference-gateway <MY_ADDRESS> True 22s
58+
```
5259

53-
1. **Deploy Ext-Proc**
60+
1. **Deploy the Inference Extension and InferencePool**
5461

5562
```bash
5663
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/ext_proc.yaml

pkg/manifests/ext_proc.yaml

+10
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,16 @@ roleRef:
4040
kind: ClusterRole
4141
name: pod-read
4242
---
43+
apiVersion: inference.networking.x-k8s.io/v1alpha1
44+
kind: InferencePool
45+
metadata:
46+
labels:
47+
name: vllm-llama2-7b-pool
48+
spec:
49+
targetPortNumber: 8000
50+
selector:
51+
app: vllm-llama2-7b-pool
52+
---
4353
apiVersion: apps/v1
4454
kind: Deployment
4555
metadata:

pkg/manifests/inferencepool-with-model.yaml renamed to pkg/manifests/inferencemodel.yaml

-10
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,4 @@
11
apiVersion: inference.networking.x-k8s.io/v1alpha1
2-
kind: InferencePool
3-
metadata:
4-
labels:
5-
name: vllm-llama2-7b-pool
6-
spec:
7-
targetPortNumber: 8000
8-
selector:
9-
app: vllm-llama2-7b-pool
10-
---
11-
apiVersion: inference.networking.x-k8s.io/v1alpha1
122
kind: InferenceModel
133
metadata:
144
labels:

pkg/manifests/vllm/vllm-lora-deployment.yaml renamed to pkg/manifests/vllm/deployment.yaml

-3
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,11 @@ spec:
1010
port: 8000
1111
targetPort: 8000
1212
type: ClusterIP
13-
1413
---
15-
1614
apiVersion: apps/v1
1715
kind: Deployment
1816
metadata:
1917
name: vllm-llama2-7b-pool
20-
namespace: default
2118
spec:
2219
replicas: 3
2320
selector:

test/e2e/README.md

+38
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,38 @@
1+
# End-to-End Tests
2+
3+
This document provides instructions on how to run the end-to-end tests.
4+
5+
## Overview
6+
7+
The end-to-end tests are designed to validate end-to-end Gateway API Inference Extension functionality. These tests are executed against a Kubernetes cluster and use the Ginkgo testing framework to ensure the extension behaves as expected.
8+
9+
## Prerequisites
10+
11+
- [Go](https://golang.org/doc/install) installed on your machine.
12+
- [Make](https://www.gnu.org/software/make/manual/make.html) installed to run the end-to-end test target.
13+
- A Hugging Face Hub token with access to the [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) model.
14+
15+
## Running the End-to-End Tests
16+
17+
Follow these steps to run the end-to-end tests:
18+
19+
1. **Clone the Repository**: Clone the `gateway-api-inference-extension` repository:
20+
21+
```sh
22+
git clone https://github.com/kubernetes-sigs/gateway-api-inference-extension.git && cd gateway-api-inference-extension
23+
```
24+
25+
1. **Export Your Hugging Face Hub Token**: The token is required to run the test model server:
26+
27+
```sh
28+
export HF_TOKEN=<MY_HF_TOKEN>
29+
```
30+
31+
1. **Run the Tests**: Run the `test-e2e` target:
32+
33+
```sh
34+
make test-e2e
35+
```
36+
37+
The test suite prints details for each step. Note that the `vllm-llama2-7b-pool` model server deployment
38+
may take several minutes to report an `Available=True` status due to the time required for bootstraping.

0 commit comments

Comments
 (0)