diff --git a/config/manifests/gateway/patch_policy.yaml b/config/manifests/gateway/patch_policy.yaml index d293bc825..01302deed 100644 --- a/config/manifests/gateway/patch_policy.yaml +++ b/config/manifests/gateway/patch_policy.yaml @@ -102,6 +102,7 @@ spec: allowModeOverride: true request: body: Buffered + attributes: ["request.path", "request.url_path"] response: # The timeouts are likely not needed here. We can experiment with removing/tuning them slowly. # The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly. diff --git a/pkg/epp/handlers/openai/routes.go b/pkg/epp/handlers/openai/routes.go new file mode 100644 index 000000000..530a9b0af --- /dev/null +++ b/pkg/epp/handlers/openai/routes.go @@ -0,0 +1,39 @@ +/* +Copyright 2025 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package handlers + +var ( + // PassthroughEndpoints are informational endpoints that do not have a model param, + // and do NOT run inference, so can be passed to any underlying model server at random. + PassthroughEndpoints map[string]bool = map[string]bool{ + // https://platform.openai.com/docs/api-reference/models/list + "/v1/models": true, + } + + // RoutableEndpoints DO have a model param and DO run inference, and thus need to + // be tracked and routed intelligently. + RoutableEndpoints map[string]bool = map[string]bool{ + // https://platform.openai.com/docs/api-reference/completions/create + "v1/completions": true, + // https://platform.openai.com/docs/api-reference/chat/create + "v1/chat/completions": true, + // https://platform.openai.com/docs/api-reference/embeddings/create + "v1/embeddings": true, + // https://platform.openai.com/docs/api-reference/audio/createTranscription + "v1/audio/transcriptions": true, + } +)