Skip to content

Commit df57165

Browse files
committed
Added the LLMResponse object and a RequestId
Signed-off-by: Shmuel Kallner <[email protected]>
1 parent fd1ddfa commit df57165

File tree

1 file changed

+24
-1
lines changed

1 file changed

+24
-1
lines changed

pkg/epp/scheduling/types/types.go

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ import (
2828

2929
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
3030
type LLMRequest struct {
31+
// RequestId is the Envoy generated Id for the request being processed
32+
RequestId string
33+
3134
// Model is the name of the model that the user specified in the request body.
3235
Model string
3336
// ResolvedTargetModel is the final target model after traffic split.
@@ -45,6 +48,24 @@ func (r *LLMRequest) String() string {
4548
r.Model, r.ResolvedTargetModel, r.Critical, len(r.Prompt), r.Headers)
4649
}
4750

51+
// LLMResponse contains information from the response received to be passed to plugins
52+
type LLMResponse struct {
53+
// RequestId is the Envoy generated Id for the request being processed
54+
RequestId string
55+
56+
// Headers is a map of the response headers. Nil during body processing
57+
Headers map[string]string
58+
59+
// Body Is the body of the response or nil during header processing
60+
Body string
61+
62+
// IsStreaming indicates whether or not the response is being streamed by the model
63+
IsStreaming bool
64+
65+
// EndOfStream when true indicates that this invocation contains the last chunk of the response
66+
EndOfStream bool
67+
}
68+
4869
type Pod interface {
4970
GetPod() *backend.Pod
5071
GetMetrics() *backendmetrics.Metrics
@@ -61,6 +82,7 @@ type SchedulingContext struct {
6182
context.Context
6283
Logger logr.Logger
6384
Req *LLMRequest
85+
Resp *LLMResponse
6486
PodsSnapshot []Pod
6587
}
6688

@@ -84,12 +106,13 @@ type PodMetrics struct {
84106
*backendmetrics.Metrics
85107
}
86108

87-
func NewSchedulingContext(ctx context.Context, req *LLMRequest, pods []Pod) *SchedulingContext {
109+
func NewSchedulingContext(ctx context.Context, req *LLMRequest, resp *LLMResponse, pods []Pod) *SchedulingContext {
88110
logger := log.FromContext(ctx).WithValues("request", req)
89111
return &SchedulingContext{
90112
Context: ctx,
91113
Logger: logger,
92114
Req: req,
115+
Resp: resp,
93116
PodsSnapshot: pods,
94117
}
95118
}

0 commit comments

Comments
 (0)