Skip to content

Commit 3f53f60

Browse files
committed
Addressing comments round 1
1 parent 16f3b1a commit 3f53f60

File tree

2 files changed

+10
-7
lines changed

2 files changed

+10
-7
lines changed

pkg/ext-proc/handlers/request.go

+9-6
Original file line numberDiff line numberDiff line change
@@ -71,23 +71,23 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
7171
klog.V(logutil.VERBOSE).Infof("Updated body: %v", string(requestBody))
7272
}
7373

74-
targetEndpoint, err := s.scheduler.Schedule(llmReq)
74+
targetPod, err := s.scheduler.Schedule(llmReq)
7575
if err != nil {
7676
return nil, fmt.Errorf("failed to find target pod: %w", err)
7777
}
78-
klog.V(logutil.VERBOSE).Infof("Selected target model %v in target pod: %v\n", llmReq.ResolvedTargetModel, targetEndpoint)
78+
klog.V(logutil.VERBOSE).Infof("Selected target model %v in target pod: %v\n", llmReq.ResolvedTargetModel, targetPod)
7979

8080
reqCtx.Model = llmReq.Model
8181
reqCtx.ResolvedTargetModel = llmReq.ResolvedTargetModel
8282
reqCtx.RequestSize = len(v.RequestBody.Body)
83-
reqCtx.TargetPod = targetEndpoint
83+
reqCtx.TargetPod = targetPod
8484

85-
// Insert "target-pod" to instruct Envoy to route requests to the specified target pod.
85+
// Insert target endpoint to instruct Envoy to route requests to the specified target pod.
8686
headers := []*configPb.HeaderValueOption{
8787
{
8888
Header: &configPb.HeaderValue{
8989
Key: s.targetEndpointKey,
90-
RawValue: []byte(targetEndpoint.Address),
90+
RawValue: []byte(targetPod.Address),
9191
},
9292
},
9393
// We need to update the content length header if the body is mutated, see Envoy doc:
@@ -105,6 +105,9 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
105105
}
106106

107107
resp := &extProcPb.ProcessingResponse{
108+
// The Endpoint Picker supports two approaches to communicating the target endpoint, as a request header
109+
// and as an unstructure ext-proc response metadata key/value pair. This enables different integration
110+
// options for gateway providers.
108111
Response: &extProcPb.ProcessingResponse_RequestBody{
109112
RequestBody: &extProcPb.BodyResponse{
110113
Response: &extProcPb.CommonResponse{
@@ -123,7 +126,7 @@ func (s *Server) HandleRequestBody(reqCtx *RequestContext, req *extProcPb.Proces
123126
Fields: map[string]*structpb.Value{
124127
s.targetEndpointKey: {
125128
Kind: &structpb.Value_StringValue{
126-
StringValue: targetEndpoint.Address,
129+
StringValue: targetPod.Address,
127130
},
128131
},
129132
},

pkg/manifests/vllm/deployment.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@ spec:
3939
- "8000"
4040
- "--enable-lora"
4141
- "--max-loras"
42-
- "2"
42+
- "4"
4343
- "--max-cpu-loras"
4444
- "12"
4545
- "--lora-modules"

0 commit comments

Comments
 (0)