From 11c9130f479564f99879562b9c976f82be852854 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Mon, 17 Mar 2025 18:08:57 +0000 Subject: [PATCH 1/3] rename ext_proc.yaml to inferencepool.yaml --- .../{ext_proc.yaml => inferencepool.yaml} | 118 +++++++++--------- site-src/guides/index.md | 6 +- test/e2e/epp/e2e_suite_test.go | 4 +- test/testdata/envoy.yaml | 4 +- 4 files changed, 66 insertions(+), 66 deletions(-) rename config/manifests/{ext_proc.yaml => inferencepool.yaml} (88%) diff --git a/config/manifests/ext_proc.yaml b/config/manifests/inferencepool.yaml similarity index 88% rename from config/manifests/ext_proc.yaml rename to config/manifests/inferencepool.yaml index d70467ee0..0f0a8a86a 100644 --- a/config/manifests/ext_proc.yaml +++ b/config/manifests/inferencepool.yaml @@ -1,45 +1,3 @@ -kind: ClusterRole -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: pod-read -rules: -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencemodels"] - verbs: ["get", "watch", "list"] -- apiGroups: [""] - resources: ["pods"] - verbs: ["get", "watch", "list"] -- apiGroups: ["inference.networking.x-k8s.io"] - resources: ["inferencepools"] - verbs: ["get", "watch", "list"] -- apiGroups: ["discovery.k8s.io"] - resources: ["endpointslices"] - verbs: ["get", "watch", "list"] -- apiGroups: - - authentication.k8s.io - resources: - - tokenreviews - verbs: - - create -- apiGroups: - - authorization.k8s.io - resources: - - subjectaccessreviews - verbs: - - create ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: pod-read-binding -subjects: -- kind: ServiceAccount - name: default - namespace: default -roleRef: - kind: ClusterRole - name: pod-read ---- apiVersion: inference.networking.x-k8s.io/v1alpha2 kind: InferencePool metadata: @@ -50,27 +8,41 @@ spec: selector: app: my-pool extensionRef: - name: inference-gateway-ext-proc + name: my-pool-epp-ext-proc +--- +apiVersion: v1 +kind: Service +metadata: + name: my-pool-epp-ext-proc + namespace: default +spec: + selector: + app: my-pool-epp-ext-proc + ports: + - protocol: TCP + port: 9002 + targetPort: 9002 + type: ClusterIP --- apiVersion: apps/v1 kind: Deployment metadata: - name: inference-gateway-ext-proc + name: my-pool-epp-ext-proc namespace: default labels: - app: inference-gateway-ext-proc + app: my-pool-epp-ext-proc spec: replicas: 1 selector: matchLabels: - app: inference-gateway-ext-proc + app: my-pool-epp-ext-proc template: metadata: labels: - app: inference-gateway-ext-proc + app: my-pool-epp-ext-proc spec: containers: - - name: inference-gateway-ext-proc + - name: epp image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main imagePullPolicy: Always args: @@ -103,16 +75,44 @@ spec: initialDelaySeconds: 5 periodSeconds: 10 --- -apiVersion: v1 -kind: Service +kind: ClusterRole +apiVersion: rbac.authorization.k8s.io/v1 metadata: - name: inference-gateway-ext-proc + name: pod-read +rules: +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencemodels"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: ["inference.networking.x-k8s.io"] + resources: ["inferencepools"] + verbs: ["get", "watch", "list"] +- apiGroups: ["discovery.k8s.io"] + resources: ["endpointslices"] + verbs: ["get", "watch", "list"] +- apiGroups: + - authentication.k8s.io + resources: + - tokenreviews + verbs: + - create +- apiGroups: + - authorization.k8s.io + resources: + - subjectaccessreviews + verbs: + - create +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: pod-read-binding +subjects: +- kind: ServiceAccount + name: default namespace: default -spec: - selector: - app: inference-gateway-ext-proc - ports: - - protocol: TCP - port: 9002 - targetPort: 9002 - type: ClusterIP +roleRef: + kind: ClusterRole + name: pod-read diff --git a/site-src/guides/index.md b/site-src/guides/index.md index 94f5c9c1c..d6ff84594 100644 --- a/site-src/guides/index.md +++ b/site-src/guides/index.md @@ -80,10 +80,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv NAME CLASS ADDRESS PROGRAMMED AGE inference-gateway inference-gateway True 22s ``` -### Deploy the Inference Extension and InferencePool +### Deploy the InferencePool and Extension ```bash - kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml + kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml ``` ### Deploy Envoy Gateway Custom Policies @@ -134,4 +134,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found kubectl delete secret hf-token --ignore-not-found - ``` \ No newline at end of file + ``` diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index bc7dc87ae..435016287 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -65,7 +65,7 @@ const ( // envoyPort is the listener port number of the test envoy proxy. envoyPort = "8081" // inferExtName is the name of the inference extension test resources. - inferExtName = "inference-gateway-ext-proc" + inferExtName = "my-pool-epp-ext-proc" // clientManifest is the manifest for the client test resources. clientManifest = "../../testdata/client.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. @@ -75,7 +75,7 @@ const ( // inferModelManifest is the manifest for the inference model CRD. inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml" // inferExtManifest is the manifest for the inference extension test resources. - inferExtManifest = "../../../config/manifests/ext_proc.yaml" + inferExtManifest = "../../../config/manifests/inferencepool.yaml" // envoyManifest is the manifest for the envoy proxy test resources. envoyManifest = "../../testdata/envoy.yaml" // modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource. diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml index ffb8add78..dc0c0c552 100644 --- a/test/testdata/envoy.yaml +++ b/test/testdata/envoy.yaml @@ -100,7 +100,7 @@ data: grpc_service: envoy_grpc: cluster_name: ext_proc - authority: inference-gateway-ext-proc.default:9002 + authority: my-pool-epp-ext-proc.default:9002 timeout: 10s processing_mode: request_header_mode: SEND @@ -194,7 +194,7 @@ data: - endpoint: address: socket_address: - address: inference-gateway-ext-proc.default + address: my-pool-epp-ext-proc.default port_value: 9002 health_status: HEALTHY load_balancing_weight: 1 From 9927ff4b874a8aa15b7ae2f099211c994d8428de Mon Sep 17 00:00:00 2001 From: ahg-g Date: Mon, 17 Mar 2025 21:06:59 +0000 Subject: [PATCH 2/3] removed ext-proc suffix --- config/manifests/inferencepool.yaml | 14 +++++++------- test/e2e/epp/e2e_suite_test.go | 2 +- test/testdata/envoy.yaml | 4 ++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool.yaml index 0f0a8a86a..695d2bc2d 100644 --- a/config/manifests/inferencepool.yaml +++ b/config/manifests/inferencepool.yaml @@ -8,16 +8,16 @@ spec: selector: app: my-pool extensionRef: - name: my-pool-epp-ext-proc + name: my-pool-epp --- apiVersion: v1 kind: Service metadata: - name: my-pool-epp-ext-proc + name: my-pool-epp namespace: default spec: selector: - app: my-pool-epp-ext-proc + app: my-pool-epp ports: - protocol: TCP port: 9002 @@ -27,19 +27,19 @@ spec: apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool-epp-ext-proc + name: my-pool-epp namespace: default labels: - app: my-pool-epp-ext-proc + app: my-pool-epp spec: replicas: 1 selector: matchLabels: - app: my-pool-epp-ext-proc + app: my-pool-epp template: metadata: labels: - app: my-pool-epp-ext-proc + app: my-pool-epp spec: containers: - name: epp diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index 435016287..4ca6496d4 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -65,7 +65,7 @@ const ( // envoyPort is the listener port number of the test envoy proxy. envoyPort = "8081" // inferExtName is the name of the inference extension test resources. - inferExtName = "my-pool-epp-ext-proc" + inferExtName = "my-pool-epp" // clientManifest is the manifest for the client test resources. clientManifest = "../../testdata/client.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml index dc0c0c552..c9ba8032e 100644 --- a/test/testdata/envoy.yaml +++ b/test/testdata/envoy.yaml @@ -100,7 +100,7 @@ data: grpc_service: envoy_grpc: cluster_name: ext_proc - authority: my-pool-epp-ext-proc.default:9002 + authority: my-pool-epp.default:9002 timeout: 10s processing_mode: request_header_mode: SEND @@ -194,7 +194,7 @@ data: - endpoint: address: socket_address: - address: my-pool-epp-ext-proc.default + address: my-pool-epp.default port_value: 9002 health_status: HEALTHY load_balancing_weight: 1 From fac33ccbb90a62ea4bd59b5777687bde59def249 Mon Sep 17 00:00:00 2001 From: ahg-g Date: Mon, 17 Mar 2025 21:19:53 +0000 Subject: [PATCH 3/3] rename my-pool to vllm-llama2-7b --- config/manifests/inferencepool.yaml | 20 ++++++++++---------- config/manifests/vllm/cpu-deployment.yaml | 6 +++--- config/manifests/vllm/gpu-deployment.yaml | 6 +++--- test/e2e/epp/e2e_suite_test.go | 4 ++-- test/testdata/envoy.yaml | 4 ++-- 5 files changed, 20 insertions(+), 20 deletions(-) diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool.yaml index 695d2bc2d..640086393 100644 --- a/config/manifests/inferencepool.yaml +++ b/config/manifests/inferencepool.yaml @@ -2,22 +2,22 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2 kind: InferencePool metadata: labels: - name: my-pool + name: vllm-llama2-7b spec: targetPortNumber: 8000 selector: - app: my-pool + app: vllm-llama2-7b extensionRef: - name: my-pool-epp + name: vllm-llama2-7b-epp --- apiVersion: v1 kind: Service metadata: - name: my-pool-epp + name: vllm-llama2-7b-epp namespace: default spec: selector: - app: my-pool-epp + app: vllm-llama2-7b-epp ports: - protocol: TCP port: 9002 @@ -27,19 +27,19 @@ spec: apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool-epp + name: vllm-llama2-7b-epp namespace: default labels: - app: my-pool-epp + app: vllm-llama2-7b-epp spec: replicas: 1 selector: matchLabels: - app: my-pool-epp + app: vllm-llama2-7b-epp template: metadata: labels: - app: my-pool-epp + app: vllm-llama2-7b-epp spec: containers: - name: epp @@ -47,7 +47,7 @@ spec: imagePullPolicy: Always args: - -poolName - - "my-pool" + - "vllm-llama2-7b" - -v - "4" - -grpcPort diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml index a0925c837..68dfd18d7 100644 --- a/config/manifests/vllm/cpu-deployment.yaml +++ b/config/manifests/vllm/cpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b template: metadata: labels: - app: my-pool + app: vllm-llama2-7b spec: containers: - name: lora diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml index d16a46a45..cdc4d82cb 100644 --- a/config/manifests/vllm/gpu-deployment.yaml +++ b/config/manifests/vllm/gpu-deployment.yaml @@ -1,16 +1,16 @@ apiVersion: apps/v1 kind: Deployment metadata: - name: my-pool + name: vllm-llama2-7b spec: replicas: 3 selector: matchLabels: - app: my-pool + app: vllm-llama2-7b template: metadata: labels: - app: my-pool + app: vllm-llama2-7b spec: containers: - name: lora diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go index 4ca6496d4..92521bf78 100644 --- a/test/e2e/epp/e2e_suite_test.go +++ b/test/e2e/epp/e2e_suite_test.go @@ -57,7 +57,7 @@ const ( // TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed nsName = "default" // modelServerName is the name of the model server test resources. - modelServerName = "my-pool" + modelServerName = "vllm-llama2-7b" // modelName is the test model name. modelName = "tweet-summary" // envoyName is the name of the envoy proxy test resources. @@ -65,7 +65,7 @@ const ( // envoyPort is the listener port number of the test envoy proxy. envoyPort = "8081" // inferExtName is the name of the inference extension test resources. - inferExtName = "my-pool-epp" + inferExtName = "vllm-llama2-7b-epp" // clientManifest is the manifest for the client test resources. clientManifest = "../../testdata/client.yaml" // modelServerSecretManifest is the manifest for the model server secret resource. diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml index c9ba8032e..2598428c6 100644 --- a/test/testdata/envoy.yaml +++ b/test/testdata/envoy.yaml @@ -100,7 +100,7 @@ data: grpc_service: envoy_grpc: cluster_name: ext_proc - authority: my-pool-epp.default:9002 + authority: vllm-llama2-7b-epp.default:9002 timeout: 10s processing_mode: request_header_mode: SEND @@ -194,7 +194,7 @@ data: - endpoint: address: socket_address: - address: my-pool-epp.default + address: vllm-llama2-7b-epp.default port_value: 9002 health_status: HEALTHY load_balancing_weight: 1