diff --git a/config/manifests/ext_proc.yaml b/config/manifests/ext_proc.yaml index d70467ee0..ae1896849 100644 --- a/config/manifests/ext_proc.yaml +++ b/config/manifests/ext_proc.yaml @@ -44,11 +44,11 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2 kind: InferencePool metadata: labels: - name: my-pool + name: vllm-llama2-7b-pool spec: targetPortNumber: 8000 selector: - app: my-pool + app: vllm-llama2-7b-pool extensionRef: name: inference-gateway-ext-proc --- @@ -75,7 +75,7 @@ spec: imagePullPolicy: Always args: - -poolName - - "my-pool" + - "vllm-llama2-7b-pool" - -v - "4" - -grpcPort diff --git a/config/manifests/inferencemodel.yaml b/config/manifests/inferencemodel.yaml index 8374c5b3e..83868a993 100644 --- a/config/manifests/inferencemodel.yaml +++ b/config/manifests/inferencemodel.yaml @@ -6,7 +6,7 @@ spec: modelName: tweet-summary criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool targetModels: - name: tweet-summary-1 weight: 100 @@ -20,7 +20,7 @@ spec: modelName: meta-llama/Llama-2-7b-hf criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool --- apiVersion: inference.networking.x-k8s.io/v1alpha2 @@ -31,4 +31,4 @@ spec: modelName: Qwen/Qwen2.5-1.5B-Instruct criticality: Critical poolRef: - name: my-pool + name: vllm-llama2-7b-pool diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index a0925c837..3bd32812b 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b-pool spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b-pool template: metadata: labels: - app: my-pool + app: vllm-llama2-7b-pool spec: containers: - name: lora diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index d16a46a45..51689c9f2 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b-pool spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b-pool template: metadata: labels: - app: my-pool + app: vllm-llama2-7b-pool spec: containers: - name: lora diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index bc7dc87ae..a62b77961 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -57,7 +57,7 @@ const ( // TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed nsName = "default" // modelServerName is the name of the model server test resources. - modelServerName = "my-pool" + modelServerName = "vllm-llama2-7b-pool" // modelName is the test model name. modelName = "tweet-summary" // envoyName is the name of the envoy proxy test resources.