Skip to content

Commit 58f226a

Browse files
ahg-gkfswain
authored andcommitted
Split the extension policy since it is envoy specific (kubernetes-sigs#524)
* split the extension policy since it is envoy specific * merge extenstion and patch policy in one manifests
1 parent b8bcf0d commit 58f226a

File tree

2 files changed

+33
-34
lines changed

2 files changed

+33
-34
lines changed

config/manifests/gateway/patch_policy.yaml

+33-1
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,36 @@ spec:
8585
# op: replace
8686
# path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_header_mode"
8787
# value: SEND
88-
88+
---
89+
apiVersion: gateway.envoyproxy.io/v1alpha1
90+
kind: EnvoyExtensionPolicy
91+
metadata:
92+
name: ext-proc-policy
93+
namespace: default
94+
spec:
95+
extProc:
96+
- backendRefs:
97+
- group: ""
98+
kind: Service
99+
name: vllm-llama2-7b-epp
100+
port: 9002
101+
processingMode:
102+
allowModeOverride: true
103+
request:
104+
body: Buffered
105+
response:
106+
# The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
107+
# The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
108+
messageTimeout: 1000s
109+
backendSettings:
110+
circuitBreaker:
111+
maxConnections: 40000
112+
maxPendingRequests: 40000
113+
maxParallelRequests: 40000
114+
timeout:
115+
tcp:
116+
connectTimeout: 24h
117+
targetRef:
118+
group: gateway.networking.k8s.io
119+
kind: HTTPRoute
120+
name: llm-route

config/manifests/inferencepool.yaml

-33
Original file line numberDiff line numberDiff line change
@@ -75,39 +75,6 @@ spec:
7575
initialDelaySeconds: 5
7676
periodSeconds: 10
7777
---
78-
apiVersion: gateway.envoyproxy.io/v1alpha1
79-
kind: EnvoyExtensionPolicy
80-
metadata:
81-
name: ext-proc-policy
82-
namespace: default
83-
spec:
84-
extProc:
85-
- backendRefs:
86-
- group: ""
87-
kind: Service
88-
name: vllm-llama2-7b-epp
89-
port: 9002
90-
processingMode:
91-
allowModeOverride: true
92-
request:
93-
body: Buffered
94-
response:
95-
# The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
96-
# The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
97-
messageTimeout: 1000s
98-
backendSettings:
99-
circuitBreaker:
100-
maxConnections: 40000
101-
maxPendingRequests: 40000
102-
maxParallelRequests: 40000
103-
timeout:
104-
tcp:
105-
connectTimeout: 24h
106-
targetRef:
107-
group: gateway.networking.k8s.io
108-
kind: HTTPRoute
109-
name: llm-route
110-
---
11178
kind: ClusterRole
11279
apiVersion: rbac.authorization.k8s.io/v1
11380
metadata:

0 commit comments

Comments
 (0)