diff --git a/Makefile b/Makefile index cf053749a..2d87c8ab9 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,8 @@ IMAGE_REGISTRY ?= us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-infe IMAGE_NAME := epp IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(IMAGE_NAME) IMAGE_TAG ?= $(IMAGE_REPO):$(GIT_TAG) +ROOT_DIR:=$(shell dirname $(realpath $(firstword $(MAKEFILE_LIST)))) +E2E_MANIFEST_PATH ?= config/manifests/vllm/gpu-deployment.yaml SYNCER_IMAGE_NAME := lora-syncer SYNCER_IMAGE_REPO ?= $(IMAGE_REGISTRY)/$(SYNCER_IMAGE_NAME) @@ -126,8 +128,8 @@ test-integration: manifests generate fmt vet envtest ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test ./test/integration/epp/... -race -coverprofile cover.out .PHONY: test-e2e -test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster with at least 3 available GPUs. - go test ./test/e2e/epp -v -ginkgo.v +test-e2e: ## Run end-to-end tests against an existing Kubernetes cluster. When using default configuration, the tests need at least 3 available GPUs. + MANIFEST_PATH=$(ROOT_DIR)/$(E2E_MANIFEST_PATH) go test ./test/e2e/epp/ -v -ginkgo.v .PHONY: lint lint: golangci-lint ## Run golangci-lint linter diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index 01da152f3..bc7dc87ae 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -68,8 +68,6 @@ const ( inferExtName = "inference-gateway-ext-proc" // clientManifest is the manifest for the client test resources. clientManifest = "../../testdata/client.yaml" - // modelServerManifest is the manifest for the model server test resources. - modelServerManifest = "../../../config/manifests/vllm/gpu-deployment.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. modelServerSecretManifest = "../../testdata/model-secret.yaml" // inferPoolManifest is the manifest for the inference pool CRD. @@ -80,6 +78,8 @@ const ( inferExtManifest = "../../../config/manifests/ext_proc.yaml" // envoyManifest is the manifest for the envoy proxy test resources. envoyManifest = "../../testdata/envoy.yaml" + // modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource. + modelServerManifestFilepathEnvVar = "MANIFEST_PATH" ) var ( @@ -107,6 +107,7 @@ var _ = ginkgo.BeforeSuite(func() { }) func setupInfra() { + modelServerManifest := readModelServerManifestPath() crds := map[string]string{ "inferencepools.inference.networking.x-k8s.io": inferPoolManifest, "inferencemodels.inference.networking.x-k8s.io": inferModelManifest, @@ -145,6 +146,7 @@ func setupSuite() { kubeCli, err = kubernetes.NewForConfig(cfg) gomega.Expect(err).NotTo(gomega.HaveOccurred()) + gomega.Expect(kubeCli).NotTo(gomega.BeNil()) } func cleanupResources() { @@ -181,6 +183,14 @@ func namespaceExists(k8sClient client.Client, ns string) { }, existsTimeout, interval) } +// readModelServerManifestPath reads from env var the absolute filepath to model server deployment for testing. +func readModelServerManifestPath() string { + ginkgo.By(fmt.Sprintf("Ensuring %s environment variable is set", modelServerManifestFilepathEnvVar)) + modelServerManifestFilepath := os.Getenv(modelServerManifestFilepathEnvVar) + gomega.Expect(modelServerManifestFilepath).NotTo(gomega.BeEmpty(), modelServerManifestFilepathEnvVar+" is not set") + return modelServerManifestFilepath +} + // createCRDs creates the Inference Extension CRDs used for testing. func createCRDs(k8sClient client.Client, crds map[string]string) { for name, path := range crds { @@ -215,6 +225,29 @@ func createClient(k8sClient client.Client, filePath string) { // createModelServer creates the model server resources used for testing from the given filePaths. func createModelServer(k8sClient client.Client, secretPath, deployPath string) { + ginkgo.By("Ensuring the model server manifest points to an existing file") + modelServerManifestArray := readYaml(deployPath) + gomega.Expect(modelServerManifestArray).NotTo(gomega.BeEmpty()) + modelServerManifestYaml := modelServerManifestArray[0] + if strings.Contains(modelServerManifestYaml, "hf-token") { + createHfSecret(k8sClient, secretPath) + } + + ginkgo.By("Creating model server resources from manifest: " + deployPath) + createObjsFromYaml(k8sClient, modelServerManifestArray) + + // Wait for the deployment to exist. + deploy := &appsv1.Deployment{} + testutils.EventuallyExists(ctx, func() error { + return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy) + }, existsTimeout, interval) + + // Wait for the deployment to be available. + testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval) +} + +// createHfSecret read HF_TOKEN from env var and creates a secret that contains the access token. +func createHfSecret(k8sClient client.Client, secretPath string) { ginkgo.By("Ensuring the HF_TOKEN environment variable is set") token := os.Getenv("HF_TOKEN") gomega.Expect(token).NotTo(gomega.BeEmpty(), "HF_TOKEN is not set") @@ -226,25 +259,13 @@ func createModelServer(k8sClient client.Client, secretPath, deployPath string) { outManifests = append(outManifests, strings.Replace(m, "$HF_TOKEN", token, 1)) } - ginkgo.By("Creating model server secret resource from manifest: " + deployPath) + ginkgo.By("Creating model server secret resource") createObjsFromYaml(k8sClient, outManifests) // Wait for the secret to exist before proceeding with test. testutils.EventuallyExists(ctx, func() error { return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: "hf-token"}, &corev1.Secret{}) }, existsTimeout, interval) - - ginkgo.By("Creating model server resources from manifest: " + deployPath) - applyYAMLFile(k8sClient, deployPath) - - // Wait for the deployment to exist. - deploy := &appsv1.Deployment{} - testutils.EventuallyExists(ctx, func() error { - return k8sClient.Get(ctx, types.NamespacedName{Namespace: nsName, Name: modelServerName}, deploy) - }, existsTimeout, interval) - - // Wait for the deployment to be available. - testutils.DeploymentAvailable(ctx, k8sClient, deploy, modelReadyTimeout, interval) } // createEnvoy creates the envoy proxy resources used for testing from the given filePath. diff --git a/test/e2e/epp/e2e_test.go b/test/e2e/epp/e2e_test.go index f5cfaf24e..09c8835ac 100644 --- a/test/e2e/epp/e2e_test.go +++ b/test/e2e/epp/e2e_test.go @@ -49,11 +49,7 @@ var _ = ginkgo.Describe("InferencePool", func() { ginkgo.By("Ensuring the InferenceModel resource exists in the namespace") gomega.Eventually(func() error { - err := cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel) - if err != nil { - return err - } - return nil + return cli.Get(ctx, types.NamespacedName{Namespace: infModel.Namespace, Name: infModel.Name}, infModel) }, existsTimeout, interval).Should(gomega.Succeed()) ginkgo.By("Verifying connectivity through the inference extension")