Uses a separate model server secret for e2e

danehans · danehans · commit 2319c1f86f01 · 2025-01-28T19:27:40.000Z
Signed-off-by: Daneyon Hansen &lt;daneyon.hansen@solo.io&gt;
diff --git a/pkg/README.md b/pkg/README.md
@@ -13,10 +13,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
 
 1. **Deploy Sample Model Server**
 
-   Create a Hugging Face secret to download the [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) model. Ensure that the token grants access to this model.
-
-   Replace `$HF_TOKEN` in `./manifests/vllm/deployment.yaml` with your Hugging Face secret and then deploy the model server.
+   Create a Hugging Face secret to download the model [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf). Ensure that the token grants access to this model.
+   Deploy a sample vLLM deployment with the proper protocol to work with the LLM Instance Gateway.
    ```bash
+   kubectl create secret generic hf-token --from-literal=token=$HF_TOKEN # Your Hugging Face Token with access to Llama2
    kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/pkg/manifests/vllm/deployment.yaml
    ```
 
diff --git a/pkg/manifests/vllm/deployment.yaml b/pkg/manifests/vllm/deployment.yaml
@@ -1,13 +1,4 @@
 apiVersion: v1
-kind: Secret
-metadata:
-  name: hf-token
-  labels:
-    app: vllm
-stringData:
-  token: $HF_TOKEN
----
-apiVersion: v1
 kind: Service
 metadata:
   name: vllm-llama2-7b-pool
diff --git a/test/e2e/e2e_suite_test.go b/test/e2e/e2e_suite_test.go
@@ -68,6 +68,8 @@ const (
 	clientManifest = "../testdata/client.yaml"
 	// modelServerManifest is the manifest for the model server test resources.
 	modelServerManifest = "../../pkg/manifests/vllm/deployment.yaml"
+	// modelServerSecretManifest is the manifest for the model server secret resource.
+	modelServerSecretManifest = "../testdata/model-secret.yaml"
 	// inferPoolManifest is the manifest for the inference pool CRD.
 	inferPoolManifest = "../../config/crd/bases/inference.networking.x-k8s.io_inferencepools.yaml"
 	// inferModelManifest is the manifest for the inference model CRD.
@@ -112,7 +114,7 @@ func setupInfra() {
 	createClient(cli, clientManifest)
 	createEnvoy(cli, envoyManifest)
 	// Run this step last, as it requires additional time for the model server to become ready.
-	createModelServer(cli, modelServerManifest)
+	createModelServer(cli, modelServerSecretManifest, modelServerManifest)
 }
 
 var _ = ginkgo.AfterSuite(func() {
@@ -208,27 +210,30 @@ func createClient(k8sClient client.Client, filePath string) {
 	testutils.PodReady(ctx, k8sClient, pod, readyTimeout, interval)
 }
 
-// createModelServer creates the model server resources used for testing from the given filePath.
-func createModelServer(k8sClient client.Client, filePath string) {
+// createModelServer creates the model server resources used for testing from the given filePaths.
+func createModelServer(k8sClient client.Client, secretPath, deployPath string) {
 	ginkgo.By("Ensuring the HF_TOKEN environment variable is set")
 	token := os.Getenv("HF_TOKEN")
 	gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set")
 
-	inManifests := readYaml(filePath)
+	inManifests := readYaml(secretPath)
 	ginkgo.By("Replacing placeholder secret data with HF_TOKEN environment variable")
 	outManifests := []string{}
 	for _, m := range inManifests {
 		outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1))
 	}
 
-	ginkgo.By("Creating model server resources from manifest: " + filePath)
+	ginkgo.By("Creating model server secret resource from manifest: " + deployPath)
 	createObjsFromYaml(k8sClient, outManifests)
 
 	// Wait for the secret to exist before proceeding with test.
 	testutils.EventuallyExists(ctx, func() error {
 		return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{})
 	}, existsTimeout, interval)
 
+	ginkgo.By("Creating model server resources from manifest: " + deployPath)
+	applyYAMLFile(k8sClient, deployPath)
+
 	// Wait for the deployment to exist.
 	deploy := &appsv1.Deployment{}
 	testutils.EventuallyExists(ctx, func() error {
diff --git a/test/testdata/model-secret.yaml b/test/testdata/model-secret.yaml
@@ -0,0 +1,8 @@
+apiVersion: v1
+kind: Secret
+metadata:
+  name: hf-token
+  labels:
+    app: vllm
+stringData:
+  token: $HF_TOKEN