diff --git a/config/manifests/gateway/httproute-with-timeout.yaml b/config/manifests/gateway/httproute-with-timeout.yaml index 060f18c50..18e90ced6 100644 --- a/config/manifests/gateway/httproute-with-timeout.yaml +++ b/config/manifests/gateway/httproute-with-timeout.yaml @@ -11,7 +11,7 @@ spec: - backendRefs: - group: inference.networking.x-k8s.io kind: InferencePool - name: vllm-llama2-7b + name: vllm-llama3-8b-instruct matches: - path: type: PathPrefix diff --git a/config/manifests/gateway/httproute.yaml b/config/manifests/gateway/httproute.yaml index 5bd8bfb6c..6ea90891c 100644 --- a/config/manifests/gateway/httproute.yaml +++ b/config/manifests/gateway/httproute.yaml @@ -11,7 +11,7 @@ spec: - backendRefs: - group: inference.networking.x-k8s.io kind: InferencePool - name: vllm-llama2-7b + name: vllm-llama3-8b-instruct matches: - path: type: PathPrefix diff --git a/config/manifests/inferencemodel.yaml b/config/manifests/inferencemodel.yaml index 5edb60011..75c9bb173 100644 --- a/config/manifests/inferencemodel.yaml +++ b/config/manifests/inferencemodel.yaml @@ -8,7 +8,7 @@ spec: poolRef: name: vllm-llama3-8b-instruct targetModels: - - name: food-review-1 + - name: food-review weight: 100 ---