File tree 2 files changed +33
-34
lines changed
2 files changed +33
-34
lines changed Original file line number Diff line number Diff line change 85
85
# op: replace
86
86
# path: "/default_filter_chain/filters/0/typed_config/http_filters/0/typed_config/processing_mode/response_header_mode"
87
87
# value: SEND
88
-
88
+ ---
89
+ apiVersion : gateway.envoyproxy.io/v1alpha1
90
+ kind : EnvoyExtensionPolicy
91
+ metadata :
92
+ name : ext-proc-policy
93
+ namespace : default
94
+ spec :
95
+ extProc :
96
+ - backendRefs :
97
+ - group : " "
98
+ kind : Service
99
+ name : vllm-llama2-7b-epp
100
+ port : 9002
101
+ processingMode :
102
+ allowModeOverride : true
103
+ request :
104
+ body : Buffered
105
+ response :
106
+ # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
107
+ # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
108
+ messageTimeout : 1000s
109
+ backendSettings :
110
+ circuitBreaker :
111
+ maxConnections : 40000
112
+ maxPendingRequests : 40000
113
+ maxParallelRequests : 40000
114
+ timeout :
115
+ tcp :
116
+ connectTimeout : 24h
117
+ targetRef :
118
+ group : gateway.networking.k8s.io
119
+ kind : HTTPRoute
120
+ name : llm-route
Original file line number Diff line number Diff line change 75
75
initialDelaySeconds : 5
76
76
periodSeconds : 10
77
77
---
78
- apiVersion : gateway.envoyproxy.io/v1alpha1
79
- kind : EnvoyExtensionPolicy
80
- metadata :
81
- name : ext-proc-policy
82
- namespace : default
83
- spec :
84
- extProc :
85
- - backendRefs :
86
- - group : " "
87
- kind : Service
88
- name : vllm-llama2-7b-epp
89
- port : 9002
90
- processingMode :
91
- allowModeOverride : true
92
- request :
93
- body : Buffered
94
- response :
95
- # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
96
- # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
97
- messageTimeout : 1000s
98
- backendSettings :
99
- circuitBreaker :
100
- maxConnections : 40000
101
- maxPendingRequests : 40000
102
- maxParallelRequests : 40000
103
- timeout :
104
- tcp :
105
- connectTimeout : 24h
106
- targetRef :
107
- group : gateway.networking.k8s.io
108
- kind : HTTPRoute
109
- name : llm-route
110
- ---
111
78
kind : ClusterRole
112
79
apiVersion : rbac.authorization.k8s.io/v1
113
80
metadata :
You can’t perform that action at this time.
0 commit comments