From 11c9130f479564f99879562b9c976f82be852854 Mon Sep 17 00:00:00 2001
From: ahg-g <ahg@google.com>
Date: Mon, 17 Mar 2025 18:08:57 +0000
Subject: [PATCH 1/3] rename ext_proc.yaml to inferencepool.yaml

---
 .../{ext_proc.yaml => inferencepool.yaml}     | 118 +++++++++---------
 site-src/guides/index.md                      |   6 +-
 test/e2e/epp/e2e_suite_test.go                |   4 +-
 test/testdata/envoy.yaml                      |   4 +-
 4 files changed, 66 insertions(+), 66 deletions(-)
 rename config/manifests/{ext_proc.yaml => inferencepool.yaml} (88%)

diff --git a/config/manifests/ext_proc.yaml b/config/manifests/inferencepool.yaml
similarity index 88%
rename from config/manifests/ext_proc.yaml
rename to config/manifests/inferencepool.yaml
index d70467ee0..0f0a8a86a 100644
--- a/config/manifests/ext_proc.yaml
+++ b/config/manifests/inferencepool.yaml
@@ -1,45 +1,3 @@
-kind: ClusterRole
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read
-rules:
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencemodels"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: [""]
-  resources: ["pods"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["inference.networking.x-k8s.io"]
-  resources: ["inferencepools"]
-  verbs: ["get", "watch", "list"]
-- apiGroups: ["discovery.k8s.io"]
-  resources: ["endpointslices"]
-  verbs: ["get", "watch", "list"]
-- apiGroups:
-  - authentication.k8s.io
-  resources:
-  - tokenreviews
-  verbs:
-  - create
-- apiGroups:
-  - authorization.k8s.io
-  resources:
-  - subjectaccessreviews
-  verbs:
-  - create
---- 
-kind: ClusterRoleBinding
-apiVersion: rbac.authorization.k8s.io/v1
-metadata:
-  name: pod-read-binding
-subjects:
-- kind: ServiceAccount
-  name: default
-  namespace: default
-roleRef:
-  kind: ClusterRole
-  name: pod-read
----
 apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
@@ -50,27 +8,41 @@ spec:
   selector:
     app: my-pool
   extensionRef:
-    name: inference-gateway-ext-proc
+    name: my-pool-epp-ext-proc
+---
+apiVersion: v1
+kind: Service
+metadata:
+  name: my-pool-epp-ext-proc
+  namespace: default
+spec:
+  selector:
+    app: my-pool-epp-ext-proc
+  ports:
+    - protocol: TCP
+      port: 9002
+      targetPort: 9002
+  type: ClusterIP
 ---
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: inference-gateway-ext-proc
+  name: my-pool-epp-ext-proc
   namespace: default
   labels:
-    app: inference-gateway-ext-proc
+    app: my-pool-epp-ext-proc
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: inference-gateway-ext-proc
+      app: my-pool-epp-ext-proc
   template:
     metadata:
       labels:
-        app: inference-gateway-ext-proc
+        app: my-pool-epp-ext-proc
     spec:
       containers:
-      - name: inference-gateway-ext-proc
+      - name: epp
         image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
         imagePullPolicy: Always
         args:
@@ -103,16 +75,44 @@ spec:
           initialDelaySeconds: 5
           periodSeconds: 10
 ---
-apiVersion: v1
-kind: Service
+kind: ClusterRole
+apiVersion: rbac.authorization.k8s.io/v1
 metadata:
-  name: inference-gateway-ext-proc
+  name: pod-read
+rules:
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencemodels"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: [""]
+  resources: ["pods"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["inference.networking.x-k8s.io"]
+  resources: ["inferencepools"]
+  verbs: ["get", "watch", "list"]
+- apiGroups: ["discovery.k8s.io"]
+  resources: ["endpointslices"]
+  verbs: ["get", "watch", "list"]
+- apiGroups:
+  - authentication.k8s.io
+  resources:
+  - tokenreviews
+  verbs:
+  - create
+- apiGroups:
+  - authorization.k8s.io
+  resources:
+  - subjectaccessreviews
+  verbs:
+  - create
+--- 
+kind: ClusterRoleBinding
+apiVersion: rbac.authorization.k8s.io/v1
+metadata:
+  name: pod-read-binding
+subjects:
+- kind: ServiceAccount
+  name: default
   namespace: default
-spec:
-  selector:
-    app: inference-gateway-ext-proc
-  ports:
-    - protocol: TCP
-      port: 9002
-      targetPort: 9002
-  type: ClusterIP
+roleRef:
+  kind: ClusterRole
+  name: pod-read
diff --git a/site-src/guides/index.md b/site-src/guides/index.md
index 94f5c9c1c..d6ff84594 100644
--- a/site-src/guides/index.md
+++ b/site-src/guides/index.md
@@ -80,10 +80,10 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    NAME                CLASS               ADDRESS         PROGRAMMED   AGE
    inference-gateway   inference-gateway   <MY_ADDRESS>    True         22s
    ```
-### Deploy the Inference Extension and InferencePool
+### Deploy the InferencePool and Extension
 
    ```bash
-   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/ext_proc.yaml
+   kubectl apply -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/inferencepool.yaml
    ```
 ### Deploy Envoy Gateway Custom Policies
 
@@ -134,4 +134,4 @@ This quickstart guide is intended for engineers familiar with k8s and model serv
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/cpu-deployment.yaml --ignore-not-found
    kubectl delete -f https://github.com/kubernetes-sigs/gateway-api-inference-extension/raw/main/config/manifests/vllm/gpu-deployment.yaml --ignore-not-found
    kubectl delete secret hf-token --ignore-not-found
-   ```
\ No newline at end of file
+   ```
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
index bc7dc87ae..435016287 100644
--- a/test/e2e/epp/e2e_suite_test.go
+++ b/test/e2e/epp/e2e_suite_test.go
@@ -65,7 +65,7 @@ const (
 	// envoyPort is the listener port number of the test envoy proxy.
 	envoyPort = "8081"
 	// inferExtName is the name of the inference extension test resources.
-	inferExtName = "inference-gateway-ext-proc"
+	inferExtName = "my-pool-epp-ext-proc"
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../../testdata/client.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
@@ -75,7 +75,7 @@ const (
 	// inferModelManifest is the manifest for the inference model CRD.
 	inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
 	// inferExtManifest is the manifest for the inference extension test resources.
-	inferExtManifest = "../../../config/manifests/ext_proc.yaml"
+	inferExtManifest = "../../../config/manifests/inferencepool.yaml"
 	// envoyManifest is the manifest for the envoy proxy test resources.
 	envoyManifest = "../../testdata/envoy.yaml"
 	// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
index ffb8add78..dc0c0c552 100644
--- a/test/testdata/envoy.yaml
+++ b/test/testdata/envoy.yaml
@@ -100,7 +100,7 @@ data:
                           grpc_service:
                             envoy_grpc:
                               cluster_name: ext_proc
-                              authority: inference-gateway-ext-proc.default:9002
+                              authority: my-pool-epp-ext-proc.default:9002
                             timeout: 10s
                           processing_mode:
                             request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
                   - endpoint:
                       address:
                         socket_address:
-                          address: inference-gateway-ext-proc.default
+                          address: my-pool-epp-ext-proc.default
                           port_value: 9002
                     health_status: HEALTHY
                     load_balancing_weight: 1

From 9927ff4b874a8aa15b7ae2f099211c994d8428de Mon Sep 17 00:00:00 2001
From: ahg-g <ahg@google.com>
Date: Mon, 17 Mar 2025 21:06:59 +0000
Subject: [PATCH 2/3] removed ext-proc suffix

---
 config/manifests/inferencepool.yaml | 14 +++++++-------
 test/e2e/epp/e2e_suite_test.go      |  2 +-
 test/testdata/envoy.yaml            |  4 ++--
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool.yaml
index 0f0a8a86a..695d2bc2d 100644
--- a/config/manifests/inferencepool.yaml
+++ b/config/manifests/inferencepool.yaml
@@ -8,16 +8,16 @@ spec:
   selector:
     app: my-pool
   extensionRef:
-    name: my-pool-epp-ext-proc
+    name: my-pool-epp
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: my-pool-epp-ext-proc
+  name: my-pool-epp
   namespace: default
 spec:
   selector:
-    app: my-pool-epp-ext-proc
+    app: my-pool-epp
   ports:
     - protocol: TCP
       port: 9002
@@ -27,19 +27,19 @@ spec:
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool-epp-ext-proc
+  name: my-pool-epp
   namespace: default
   labels:
-    app: my-pool-epp-ext-proc
+    app: my-pool-epp
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: my-pool-epp-ext-proc
+      app: my-pool-epp
   template:
     metadata:
       labels:
-        app: my-pool-epp-ext-proc
+        app: my-pool-epp
     spec:
       containers:
       - name: epp
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
index 435016287..4ca6496d4 100644
--- a/test/e2e/epp/e2e_suite_test.go
+++ b/test/e2e/epp/e2e_suite_test.go
@@ -65,7 +65,7 @@ const (
 	// envoyPort is the listener port number of the test envoy proxy.
 	envoyPort = "8081"
 	// inferExtName is the name of the inference extension test resources.
-	inferExtName = "my-pool-epp-ext-proc"
+	inferExtName = "my-pool-epp"
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../../testdata/client.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
index dc0c0c552..c9ba8032e 100644
--- a/test/testdata/envoy.yaml
+++ b/test/testdata/envoy.yaml
@@ -100,7 +100,7 @@ data:
                           grpc_service:
                             envoy_grpc:
                               cluster_name: ext_proc
-                              authority: my-pool-epp-ext-proc.default:9002
+                              authority: my-pool-epp.default:9002
                             timeout: 10s
                           processing_mode:
                             request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
                   - endpoint:
                       address:
                         socket_address:
-                          address: my-pool-epp-ext-proc.default
+                          address: my-pool-epp.default
                           port_value: 9002
                     health_status: HEALTHY
                     load_balancing_weight: 1

From fac33ccbb90a62ea4bd59b5777687bde59def249 Mon Sep 17 00:00:00 2001
From: ahg-g <ahg@google.com>
Date: Mon, 17 Mar 2025 21:19:53 +0000
Subject: [PATCH 3/3] rename my-pool to vllm-llama2-7b

---
 config/manifests/inferencepool.yaml       | 20 ++++++++++----------
 config/manifests/vllm/cpu-deployment.yaml |  6 +++---
 config/manifests/vllm/gpu-deployment.yaml |  6 +++---
 test/e2e/epp/e2e_suite_test.go            |  4 ++--
 test/testdata/envoy.yaml                  |  4 ++--
 5 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/config/manifests/inferencepool.yaml b/config/manifests/inferencepool.yaml
index 695d2bc2d..640086393 100644
--- a/config/manifests/inferencepool.yaml
+++ b/config/manifests/inferencepool.yaml
@@ -2,22 +2,22 @@ apiVersion: inference.networking.x-k8s.io/v1alpha2
 kind: InferencePool
 metadata:
   labels:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   targetPortNumber: 8000
   selector:
-    app: my-pool
+    app: vllm-llama2-7b
   extensionRef:
-    name: my-pool-epp
+    name: vllm-llama2-7b-epp
 ---
 apiVersion: v1
 kind: Service
 metadata:
-  name: my-pool-epp
+  name: vllm-llama2-7b-epp
   namespace: default
 spec:
   selector:
-    app: my-pool-epp
+    app: vllm-llama2-7b-epp
   ports:
     - protocol: TCP
       port: 9002
@@ -27,19 +27,19 @@ spec:
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool-epp
+  name: vllm-llama2-7b-epp
   namespace: default
   labels:
-    app: my-pool-epp
+    app: vllm-llama2-7b-epp
 spec:
   replicas: 1
   selector:
     matchLabels:
-      app: my-pool-epp
+      app: vllm-llama2-7b-epp
   template:
     metadata:
       labels:
-        app: my-pool-epp
+        app: vllm-llama2-7b-epp
     spec:
       containers:
       - name: epp
@@ -47,7 +47,7 @@ spec:
         imagePullPolicy: Always
         args:
         - -poolName
-        - "my-pool"
+        - "vllm-llama2-7b"
         - -v
         - "4"
         - -grpcPort
diff --git a/config/manifests/vllm/cpu-deployment.yaml b/config/manifests/vllm/cpu-deployment.yaml
index a0925c837..68dfd18d7 100644
--- a/config/manifests/vllm/cpu-deployment.yaml
+++ b/config/manifests/vllm/cpu-deployment.yaml
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: my-pool
+      app: vllm-llama2-7b
   template:
     metadata:
       labels:
-        app: my-pool
+        app: vllm-llama2-7b
     spec:
       containers:
         - name: lora
diff --git a/config/manifests/vllm/gpu-deployment.yaml b/config/manifests/vllm/gpu-deployment.yaml
index d16a46a45..cdc4d82cb 100644
--- a/config/manifests/vllm/gpu-deployment.yaml
+++ b/config/manifests/vllm/gpu-deployment.yaml
@@ -1,16 +1,16 @@
 apiVersion: apps/v1
 kind: Deployment
 metadata:
-  name: my-pool
+  name: vllm-llama2-7b
 spec:
   replicas: 3
   selector:
     matchLabels:
-      app: my-pool
+      app: vllm-llama2-7b
   template:
     metadata:
       labels:
-        app: my-pool
+        app: vllm-llama2-7b
     spec:
       containers:
         - name: lora
diff --git a/test/e2e/epp/e2e_suite_test.go b/test/e2e/epp/e2e_suite_test.go
index 4ca6496d4..92521bf78 100644
--- a/test/e2e/epp/e2e_suite_test.go
+++ b/test/e2e/epp/e2e_suite_test.go
@@ -57,7 +57,7 @@ const (
 	// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
 	nsName = "default"
 	// modelServerName is the name of the model server test resources.
-	modelServerName = "my-pool"
+	modelServerName = "vllm-llama2-7b"
 	// modelName is the test model name.
 	modelName = "tweet-summary"
 	// envoyName is the name of the envoy proxy test resources.
@@ -65,7 +65,7 @@ const (
 	// envoyPort is the listener port number of the test envoy proxy.
 	envoyPort = "8081"
 	// inferExtName is the name of the inference extension test resources.
-	inferExtName = "my-pool-epp"
+	inferExtName = "vllm-llama2-7b-epp"
 	// clientManifest is the manifest for the client test resources.
 	clientManifest = "../../testdata/client.yaml"
 	// modelServerSecretManifest is the manifest for the model server secret resource.
diff --git a/test/testdata/envoy.yaml b/test/testdata/envoy.yaml
index c9ba8032e..2598428c6 100644
--- a/test/testdata/envoy.yaml
+++ b/test/testdata/envoy.yaml
@@ -100,7 +100,7 @@ data:
                           grpc_service:
                             envoy_grpc:
                               cluster_name: ext_proc
-                              authority: my-pool-epp.default:9002
+                              authority: vllm-llama2-7b-epp.default:9002
                             timeout: 10s
                           processing_mode:
                             request_header_mode: SEND
@@ -194,7 +194,7 @@ data:
                   - endpoint:
                       address:
                         socket_address:
-                          address: my-pool-epp.default
+                          address: vllm-llama2-7b-epp.default
                           port_value: 9002
                     health_status: HEALTHY
                     load_balancing_weight: 1