Skip to content

Commit d604a20

Browse files
committed
Groundwork to support OpenAI API endpoints that vLLM supports
1 parent 7fbef9e commit d604a20

File tree

2 files changed

+40
-0
lines changed

2 files changed

+40
-0
lines changed

config/manifests/gateway/patch_policy.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ spec:
102102
allowModeOverride: true
103103
request:
104104
body: Buffered
105+
attributes: ["request.path", "request.url_path"]
105106
response:
106107
# The timeouts are likely not needed here. We can experiment with removing/tuning them slowly.
107108
# The connection limits are more important and will cause the opaque: ext_proc_gRPC_error_14 error in Envoy GW if not configured correctly.

pkg/epp/handlers/openai/routes.go

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package handlers
18+
19+
var (
20+
// PassthroughEndpoints are informational endpoints that do not have a model param,
21+
// and do NOT run inference, so can be passed to any underlying model server at random.
22+
PassthroughEndpoints map[string]bool = map[string]bool{
23+
// https://platform.openai.com/docs/api-reference/models/list
24+
"/v1/models": true,
25+
}
26+
27+
// RoutableEndpoints DO have a model param and DO run inference, and thus need to
28+
// be tracked and routed intelligently.
29+
RoutableEndpoints map[string]bool = map[string]bool{
30+
// https://platform.openai.com/docs/api-reference/completions/create
31+
"v1/completions": true,
32+
// https://platform.openai.com/docs/api-reference/chat/create
33+
"v1/chat/completions": true,
34+
// https://platform.openai.com/docs/api-reference/embeddings/create
35+
"v1/embeddings": true,
36+
// https://platform.openai.com/docs/api-reference/audio/createTranscription
37+
"v1/audio/transcriptions": true,
38+
}
39+
)

0 commit comments

Comments
 (0)