Skip to content

Commit ae3df87

Browse files
authored
E2E test improvements (#661)
1 parent 4ed93bf commit ae3df87

File tree

6 files changed

+183
-10
lines changed

6 files changed

+183
-10
lines changed

config/manifests/inferencepool-resources.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,6 @@
1+
# Note: If you change this file, please also change the file used for e2e tests!
2+
#
3+
# https://github.com/kubernetes-sigs/gateway-api-inference-extension/blob/main/test/testdata/inferencepool-e2e.yaml
14
apiVersion: inference.networking.x-k8s.io/v1alpha2
25
kind: InferencePool
36
metadata:

config/manifests/vllm/cpu-deployment.yaml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,8 @@ data:
113113
ensureExist:
114114
models:
115115
- base-model: Qwen/Qwen2.5-1.5B
116-
id: food-review-1
116+
id: food-review
117+
source: SriSanth2345/Qwen-1.5B-Tweet-Generations
118+
- base-model: Qwen/Qwen2.5-1.5B
119+
id: cad-fabricator
117120
source: SriSanth2345/Qwen-1.5B-Tweet-Generations

test/e2e/epp/README.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,13 @@ Follow these steps to run the end-to-end tests:
2828
export HF_TOKEN=<MY_HF_TOKEN>
2929
```
3030

31+
1. **(Optional): Set the test namespace**: By default, the e2e test creates resources in the `inf-ext-e2e` namespace.
32+
If you would like to change this namespace, set the following environment variable:
33+
34+
```sh
35+
export E2E_NS=<MY_NS>
36+
```
37+
3138
1. **Run the Tests**: Run the `test-e2e` target:
3239

3340
```sh

test/e2e/epp/e2e_suite_test.go

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
corev1 "k8s.io/api/core/v1"
3131
rbacv1 "k8s.io/api/rbac/v1"
3232
apiextv1 "k8s.io/apiextensions-apiserver/pkg/apis/apiextensions/v1"
33+
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3334
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
3435
"k8s.io/apimachinery/pkg/runtime"
3536
"k8s.io/apimachinery/pkg/runtime/serializer"
@@ -55,9 +56,8 @@ const (
5556
defaultInterval = time.Millisecond * 250
5657
// defaultCurlInterval is the default interval to run the test curl command.
5758
defaultCurlInterval = time.Second * 5
58-
// nsName is the name of the Namespace used for tests.
59-
// TODO [danehans]: Must be "default" until https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/227 is fixed
60-
nsName = "default"
59+
// defaultNsName is the default name of the Namespace used for tests. Can override using the E2E_NS environment variable.
60+
defaultNsName = "inf-ext-e2e"
6161
// modelServerName is the name of the model server test resources.
6262
modelServerName = "vllm-llama3-8b-instruct"
6363
// modelName is the test model name.
@@ -77,7 +77,7 @@ const (
7777
// inferModelManifest is the manifest for the inference model CRD.
7878
inferModelManifest = "../../../config/crd/bases/inference.networking.x-k8s.io_inferencemodels.yaml"
7979
// inferExtManifest is the manifest for the inference extension test resources.
80-
inferExtManifest = "../../../config/manifests/inferencepool-resources.yaml"
80+
inferExtManifest = "../../testdata/inferencepool-e2e.yaml"
8181
// envoyManifest is the manifest for the envoy proxy test resources.
8282
envoyManifest = "../../testdata/envoy.yaml"
8383
// modelServerManifestFilepathEnvVar is the env var that holds absolute path to the manifest for the model server test resource.
@@ -91,6 +91,7 @@ var (
9191
kubeCli *kubernetes.Clientset
9292
scheme = runtime.NewScheme()
9393
cfg = config.GetConfigOrDie()
94+
nsName string
9495
)
9596

9697
func TestAPIs(t *testing.T) {
@@ -101,6 +102,11 @@ func TestAPIs(t *testing.T) {
101102
}
102103

103104
var _ = ginkgo.BeforeSuite(func() {
105+
nsName = os.Getenv("E2E_NS")
106+
if nsName == "" {
107+
nsName = defaultNsName
108+
}
109+
104110
ginkgo.By("Setting up the test suite")
105111
setupSuite()
106112

@@ -109,6 +115,8 @@ var _ = ginkgo.BeforeSuite(func() {
109115
})
110116

111117
func setupInfra() {
118+
createNamespace(cli, nsName)
119+
112120
modelServerManifestPath := readModelServerManifestPath()
113121
modelServerManifestArray := getYamlsFromModelServerManifest(modelServerManifestPath)
114122
if strings.Contains(modelServerManifestArray[0], "hf-token") {
@@ -118,6 +126,7 @@ func setupInfra() {
118126
"inferencepools.inference.networking.x-k8s.io": inferPoolManifest,
119127
"inferencemodels.inference.networking.x-k8s.io": inferModelManifest,
120128
}
129+
121130
createCRDs(cli, crds)
122131
createInferExt(cli, inferExtManifest)
123132
createClient(cli, clientManifest)
@@ -182,6 +191,17 @@ var (
182191
curlInterval = defaultCurlInterval
183192
)
184193

194+
func createNamespace(k8sClient client.Client, ns string) {
195+
ginkgo.By("Creating e2e namespace: " + ns)
196+
obj := &corev1.Namespace{
197+
ObjectMeta: v1.ObjectMeta{
198+
Name: ns,
199+
},
200+
}
201+
err := k8sClient.Create(ctx, obj)
202+
gomega.Expect(err).NotTo(gomega.HaveOccurred(), "Failed to create e2e test namespace")
203+
}
204+
185205
// namespaceExists ensures that a specified namespace exists and is ready for use.
186206
func namespaceExists(k8sClient client.Client, ns string) {
187207
ginkgo.By("Ensuring namespace exists: " + ns)
@@ -276,8 +296,15 @@ func createHfSecret(k8sClient client.Client, secretPath string) {
276296

277297
// createEnvoy creates the envoy proxy resources used for testing from the given filePath.
278298
func createEnvoy(k8sClient client.Client, filePath string) {
299+
inManifests := readYaml(filePath)
300+
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
301+
outManifests := []string{}
302+
for _, m := range inManifests {
303+
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
304+
}
305+
279306
ginkgo.By("Creating envoy proxy resources from manifest: " + filePath)
280-
applyYAMLFile(k8sClient, filePath)
307+
createObjsFromYaml(k8sClient, outManifests)
281308

282309
// Wait for the configmap to exist before proceeding with test.
283310
cfgMap := &corev1.ConfigMap{}
@@ -302,8 +329,15 @@ func createEnvoy(k8sClient client.Client, filePath string) {
302329

303330
// createInferExt creates the inference extension resources used for testing from the given filePath.
304331
func createInferExt(k8sClient client.Client, filePath string) {
332+
inManifests := readYaml(filePath)
333+
ginkgo.By("Replacing placeholder namespace with E2E_NS environment variable")
334+
outManifests := []string{}
335+
for _, m := range inManifests {
336+
outManifests = append(outManifests, strings.ReplaceAll(m, "$E2E_NS", nsName))
337+
}
338+
305339
ginkgo.By("Creating inference extension resources from manifest: " + filePath)
306-
applyYAMLFile(k8sClient, filePath)
340+
createObjsFromYaml(k8sClient, outManifests)
307341

308342
// Wait for the clusterrole to exist.
309343
testutils.EventuallyExists(ctx, func() error {

test/testdata/envoy.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,7 @@ data:
100100
grpc_service:
101101
envoy_grpc:
102102
cluster_name: ext_proc
103-
authority: vllm-llama3-8b-instruct-epp.default:9002
103+
authority: vllm-llama3-8b-instruct-epp.$E2E_NS:9002
104104
timeout: 10s
105105
processing_mode:
106106
request_header_mode: SEND
@@ -195,7 +195,7 @@ data:
195195
- endpoint:
196196
address:
197197
socket_address:
198-
address: vllm-llama3-8b-instruct-epp.default
198+
address: vllm-llama3-8b-instruct-epp.$E2E_NS
199199
port_value: 9002
200200
health_status: HEALTHY
201201
load_balancing_weight: 1
@@ -225,7 +225,7 @@ spec:
225225
image: docker.io/envoyproxy/envoy:distroless-v1.33.2
226226
args:
227227
- "--service-cluster"
228-
- "default/inference-gateway"
228+
- "$E2E_NS/inference-gateway"
229229
- "--service-node"
230230
- "$(ENVOY_POD_NAME)"
231231
- "--log-level"

test/testdata/inferencepool-e2e.yaml

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,126 @@
1+
apiVersion: inference.networking.x-k8s.io/v1alpha2
2+
kind: InferencePool
3+
metadata:
4+
labels:
5+
name: vllm-llama3-8b-instruct
6+
spec:
7+
targetPortNumber: 8000
8+
selector:
9+
app: vllm-llama3-8b-instruct
10+
extensionRef:
11+
name: vllm-llama3-8b-instruct-epp
12+
namespace: $E2E_NS
13+
---
14+
apiVersion: v1
15+
kind: Service
16+
metadata:
17+
name: vllm-llama3-8b-instruct-epp
18+
namespace: $E2E_NS
19+
spec:
20+
selector:
21+
app: vllm-llama3-8b-instruct-epp
22+
ports:
23+
- protocol: TCP
24+
port: 9002
25+
targetPort: 9002
26+
appProtocol: http2
27+
type: ClusterIP
28+
---
29+
apiVersion: apps/v1
30+
kind: Deployment
31+
metadata:
32+
name: vllm-llama3-8b-instruct-epp
33+
namespace: $E2E_NS
34+
labels:
35+
app: vllm-llama3-8b-instruct-epp
36+
spec:
37+
replicas: 1
38+
selector:
39+
matchLabels:
40+
app: vllm-llama3-8b-instruct-epp
41+
template:
42+
metadata:
43+
labels:
44+
app: vllm-llama3-8b-instruct-epp
45+
spec:
46+
# Conservatively, this timeout should mirror the longest grace period of the pods within the pool
47+
terminationGracePeriodSeconds: 130
48+
containers:
49+
- name: epp
50+
image: us-central1-docker.pkg.dev/k8s-staging-images/gateway-api-inference-extension/epp:main
51+
imagePullPolicy: Always
52+
args:
53+
- -poolName
54+
- "vllm-llama3-8b-instruct"
55+
- -poolNamespace
56+
- "$E2E_NS"
57+
- -v
58+
- "4"
59+
- --zap-encoder
60+
- "json"
61+
- -grpcPort
62+
- "9002"
63+
- -grpcHealthPort
64+
- "9003"
65+
env:
66+
- name: USE_STREAMING
67+
value: "true"
68+
ports:
69+
- containerPort: 9002
70+
- containerPort: 9003
71+
- name: metrics
72+
containerPort: 9090
73+
livenessProbe:
74+
grpc:
75+
port: 9003
76+
service: inference-extension
77+
initialDelaySeconds: 5
78+
periodSeconds: 10
79+
readinessProbe:
80+
grpc:
81+
port: 9003
82+
service: inference-extension
83+
initialDelaySeconds: 5
84+
periodSeconds: 10
85+
---
86+
kind: ClusterRole
87+
apiVersion: rbac.authorization.k8s.io/v1
88+
metadata:
89+
name: pod-read
90+
rules:
91+
- apiGroups: ["inference.networking.x-k8s.io"]
92+
resources: ["inferencemodels"]
93+
verbs: ["get", "watch", "list"]
94+
- apiGroups: [""]
95+
resources: ["pods"]
96+
verbs: ["get", "watch", "list"]
97+
- apiGroups: ["inference.networking.x-k8s.io"]
98+
resources: ["inferencepools"]
99+
verbs: ["get", "watch", "list"]
100+
- apiGroups: ["discovery.k8s.io"]
101+
resources: ["endpointslices"]
102+
verbs: ["get", "watch", "list"]
103+
- apiGroups:
104+
- authentication.k8s.io
105+
resources:
106+
- tokenreviews
107+
verbs:
108+
- create
109+
- apiGroups:
110+
- authorization.k8s.io
111+
resources:
112+
- subjectaccessreviews
113+
verbs:
114+
- create
115+
---
116+
kind: ClusterRoleBinding
117+
apiVersion: rbac.authorization.k8s.io/v1
118+
metadata:
119+
name: pod-read-binding
120+
subjects:
121+
- kind: ServiceAccount
122+
name: default
123+
namespace: $E2E_NS
124+
roleRef:
125+
kind: ClusterRole
126+
name: pod-read

0 commit comments

Comments
 (0)