Skip to content

Commit 2319c1f

Browse files
committed
Uses a separate model server secret for e2e
Signed-off-by: Daneyon Hansen <[email protected]>
1 parent 4818300 commit 2319c1f

File tree

4 files changed

+21
-17
lines changed

4 files changed

+21
-17
lines changed

pkg/README.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
1313

1414
1. **Deploy Sample Model Server**
1515

16-
Create a Hugging Face secret to download the [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) model. Ensure that the token grants access to this model.
17-
18-
Replace `$HF_TOKEN` in `./manifests/vllm/deployment.yaml` with your Hugging Face secret and then deploy the model server.
16+
Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
17+
Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
1918
```bash
19+
kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
2020
kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
2121
```
2222

pkg/manifests/vllm/deployment.yaml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,4 @@
11
apiVersion: v1
2-
kind: Secret
3-
metadata:
4-
name: hf-token
5-
labels:
6-
app: vllm
7-
stringData:
8-
token: $HF_TOKEN
9-
---
10-
apiVersion: v1
112
kind: Service
123
metadata:
134
name: vllm-llama2-7b-pool

test/e2e/e2e_suite_test.go

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,8 @@ const (
6868
clientManifest = "../testdata/client.yaml"
6969
// modelServerManifest is the manifest for the model server test resources.
7070
modelServerManifest = "../../pkg/manifests/vllm/deployment.yaml"
71+
// modelServerSecretManifest is the manifest for the model server secret resource.
72+
modelServerSecretManifest = "../testdata/model-secret.yaml"
7173
// inferPoolManifest is the manifest for the inference pool CRD.
7274
inferPoolManifest = "../../config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml"
7375
// inferModelManifest is the manifest for the inference model CRD.
@@ -112,7 +114,7 @@ func setupInfra() {
112114
createClient(cli, clientManifest)
113115
createEnvoy(cli, envoyManifest)
114116
// Run this step last, as it requires additional time for the model server to become ready.
115-
createModelServer(cli, modelServerManifest)
117+
createModelServer(cli, modelServerSecretManifest, modelServerManifest)
116118
}
117119

118120
var _ = ginkgo.AfterSuite(func() {
@@ -208,27 +210,30 @@ func createClient(k8sClient client.Client, filePath string) {
208210
testutils.PodReady(ctx, k8sClient, pod, readyTimeout, interval)
209211
}
210212

211-
// createModelServer creates the model server resources used for testing from the given filePath.
212-
func createModelServer(k8sClient client.Client, filePath string) {
213+
// createModelServer creates the model server resources used for testing from the given filePaths.
214+
func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
213215
ginkgo.By("Ensuring the HF_TOKEN environment variable is set")
214216
token := os.Getenv("HF_TOKEN")
215217
gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set")
216218

217-
inManifests := readYaml(filePath)
219+
inManifests := readYaml(secretPath)
218220
ginkgo.By("Replacing placeholder secret data with HF_TOKEN environment variable")
219221
outManifests := []string{}
220222
for _, m := range inManifests {
221223
outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1))
222224
}
223225

224-
ginkgo.By("Creating model server resources from manifest: " + filePath)
226+
ginkgo.By("Creating model server secret resource from manifest: " + deployPath)
225227
createObjsFromYaml(k8sClient, outManifests)
226228

227229
// Wait for the secret to exist before proceeding with test.
228230
testutils.EventuallyExists(ctx, func() error {
229231
return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{})
230232
}, existsTimeout, interval)
231233

234+
ginkgo.By("Creating model server resources from manifest: " + deployPath)
235+
applyYAMLFile(k8sClient, deployPath)
236+
232237
// Wait for the deployment to exist.
233238
deploy := &appsv1.Deployment{}
234239
testutils.EventuallyExists(ctx, func() error {

test/testdata/model-secret.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
apiVersion: v1
2+
kind: Secret
3+
metadata:
4+
name: hf-token
5+
labels:
6+
app: vllm
7+
stringData:
8+
token: $HF_TOKEN

0 commit comments

Comments
 (0)