File tree 2 files changed +33
-32
lines changed 2 files changed +33
-32
lines changed Load Diff This file was deleted.
Original file line number Diff line number Diff line change 75
75
initialDelaySeconds : 5
76
76
periodSeconds : 10
77
77
---
78
+ apiVersion : gateway.envoyproxy.io/v1alpha1
79
+ kind : EnvoyExtensionPolicy
80
+ metadata :
81
+ name : ext-proc-policy
82
+ namespace : default
83
+ spec :
84
+ extProc :
85
+ - backendRefs :
86
+ - group : " "
87
+ kind : Service
88
+ name : vllm-llama2-7b-epp
89
+ port : 9002
90
+ processingMode :
91
+ allowModeOverride : true
92
+ request :
93
+ body : Buffered
94
+ response :
95
+ # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
96
+ # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.
97
+ messageTimeout : 1000s
98
+ backendSettings :
99
+ circuitBreaker :
100
+ maxConnections : 40000
101
+ maxPendingRequests : 40000
102
+ maxParallelRequests : 40000
103
+ timeout :
104
+ tcp :
105
+ connectTimeout : 24h
106
+ targetRef :
107
+ group : gateway.networking.k8s.io
108
+ kind : HTTPRoute
109
+ name : llm-route
110
+ ---
78
111
kind : ClusterRole
79
112
apiVersion : rbac.authorization.k8s.io/v1
80
113
metadata :
You can’t perform that action at this time.
0 commit comments