diff --git a/pkg/README.md b/pkg/README.md index b114ea76b..dc376a79b 100644 --- a/pkg/README.md +++ b/pkg/README.md @@ -30,7 +30,6 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy. ``` Additionally, if you would like to enable the admin interface, you can uncomment the admin lines and run this again. - 1. **Deploy Gateway** ```bash @@ -41,6 +40,12 @@ The current manifests rely on Envoy Gateway [v1.2.1](https://gateway.envoyproxy. ```bash kubectl apply -f ./manifests/ext_proc.yaml + ``` + +1. **Deploy Envoy Gateway Custom Policies** + + ```bash + kubectl apply -f ./manifests/extension_policy.yaml kubectl apply -f ./manifests/patch_policy.yaml ``` diff --git a/pkg/manifests/ext_proc.yaml b/pkg/manifests/ext_proc.yaml index a91410711..73d982cf6 100644 --- a/pkg/manifests/ext_proc.yaml +++ b/pkg/manifests/ext_proc.yaml @@ -89,35 +89,3 @@ spec: port: 9002 targetPort: 9002 type: ClusterIP ---- -apiVersion: gateway.envoyproxy.io/v1alpha1 -kind: EnvoyExtensionPolicy -metadata: - name: ext-proc-policy - namespace: default -spec: - extProc: - - backendRefs: - - group: "" - kind: Service - name: inference-gateway-ext-proc - port: 9002 - processingMode: - request: - body: Buffered - response: - # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly. - # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly. - messageTimeout: 1000s - backendSettings: - circuitBreaker: - maxConnections: 40000 - maxPendingRequests: 40000 - maxParallelRequests: 40000 - timeout: - tcp: - connectTimeout: 24h - targetRef: - group: gateway.networking.k8s.io - kind: HTTPRoute - name: llm-route diff --git a/pkg/manifests/extension_policy.yaml b/pkg/manifests/extension_policy.yaml new file mode 100644 index 000000000..a8105d6d1 --- /dev/null +++ b/pkg/manifests/extension_policy.yaml @@ -0,0 +1,31 @@ +apiVersion: gateway.envoyproxy.io/v1alpha1 +kind: EnvoyExtensionPolicy +metadata: + name: ext-proc-policy + namespace: default +spec: + extProc: + - backendRefs: + - group: "" + kind: Service + name: inference-gateway-ext-proc + port: 9002 + processingMode: + request: + body: Buffered + response: + # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly. + # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly. + messageTimeout: 1000s + backendSettings: + circuitBreaker: + maxConnections: 40000 + maxPendingRequests: 40000 + maxParallelRequests: 40000 + timeout: + tcp: + connectTimeout: 24h + targetRef: + group: gateway.networking.k8s.io + kind: HTTPRoute + name: llm-route