Skip to content

Commit 3d19d6d

Browse files
nirrozenbaumrlakhtakia
authored andcommitted
passing headers to scheduler plugins (kubernetes-sigs#775)
* passing headers to scheduler plugins Signed-off-by: Nir Rozenbaum <[email protected]> * addressed code review comments Signed-off-by: Nir Rozenbaum <[email protected]> --------- Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent f66daf6 commit 3d19d6d

File tree

3 files changed

+15
-13
lines changed

3 files changed

+15
-13
lines changed

pkg/epp/handlers/request.go

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,14 +32,11 @@ import (
3232
)
3333

3434
// HandleRequestBody always returns the requestContext even in the error case, as the request context is used in error handling.
35-
func (s *StreamingServer) HandleRequestBody(
36-
ctx context.Context,
37-
reqCtx *RequestContext,
38-
) (*RequestContext, error) {
39-
var requestBodyBytes []byte
35+
func (s *StreamingServer) HandleRequestBody(ctx context.Context, reqCtx *RequestContext) (*RequestContext, error) {
4036
logger := log.FromContext(ctx)
41-
requestBodyMap := reqCtx.Request.Body
4237

38+
var requestBodyBytes []byte
39+
requestBodyMap := reqCtx.Request.Body
4340
// Resolve target models.
4441
model, ok := requestBodyMap["model"].(string)
4542
if !ok {
@@ -70,6 +67,7 @@ func (s *StreamingServer) HandleRequestBody(
7067
ResolvedTargetModel: modelName,
7168
Critical: modelObj.Spec.Criticality != nil && *modelObj.Spec.Criticality == v1alpha2.Critical,
7269
Prompt: prompt,
70+
Headers: reqCtx.Request.Headers,
7371
}
7472
logger.V(logutil.DEBUG).Info("LLM request assembled", "request", llmReq)
7573

pkg/epp/scheduling/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ package scheduling
1818

1919
import "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/scheduling/plugins"
2020

21-
// SchedulerConfig creates a new SchedulerConfig object with the given plugins.
21+
// NewSchedulerConfig creates a new SchedulerConfig object with the given plugins.
2222
func NewSchedulerConfig(preSchedulePlugins []plugins.PreSchedule, filters []plugins.Filter, scorers map[plugins.Scorer]int,
2323
picker plugins.Picker, postSchedulePlugins []plugins.PostSchedule) *SchedulerConfig {
2424
return &SchedulerConfig{

pkg/epp/scheduling/types/types.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,17 +28,21 @@ import (
2828

2929
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
3030
type LLMRequest struct {
31+
// Model is the name of the model that the user specified in the request body.
3132
Model string
32-
// Target models is a map of target model name to weight.
33-
TargetModels map[string]int
34-
Prompt string
35-
// Resolved target model is the final target model after traffic split.
33+
// ResolvedTargetModel is the final target model after traffic split.
3634
ResolvedTargetModel string
37-
Critical bool
35+
// Critical is a boolean that specifies if a request is critical or not.
36+
Critical bool
37+
// Prompt is the prompt that was sent in the request body.
38+
Prompt string
39+
// Headers is a map of the request headers.
40+
Headers map[string]string
3841
}
3942

4043
func (r *LLMRequest) String() string {
41-
return fmt.Sprintf("Model: %s, TargetModels: %v, ResolvedTargetModel: %s, Critical: %t, PromptLength: %v", r.Model, r.TargetModels, r.ResolvedTargetModel, r.Critical, len(r.Prompt))
44+
return fmt.Sprintf("Model: %s, ResolvedTargetModel: %s, Critical: %t, PromptLength: %d, Headers: %v",
45+
r.Model, r.ResolvedTargetModel, r.Critical, len(r.Prompt), r.Headers)
4246
}
4347

4448
type Pod interface {

0 commit comments

Comments
 (0)