@@ -28,6 +28,9 @@ import (
28
28
29
29
// LLMRequest is a structured representation of the fields we parse out of the LLMRequest body.
30
30
type LLMRequest struct {
31
+ // RequestId is the Envoy generated Id for the request being processed
32
+ RequestId string
33
+
31
34
// Model is the name of the model that the user specified in the request body.
32
35
Model string
33
36
// ResolvedTargetModel is the final target model after traffic split.
@@ -45,6 +48,24 @@ func (r *LLMRequest) String() string {
45
48
r .Model , r .ResolvedTargetModel , r .Critical , len (r .Prompt ), r .Headers )
46
49
}
47
50
51
+ // LLMResponse contains information from the response received to be passed to plugins
52
+ type LLMResponse struct {
53
+ // RequestId is the Envoy generated Id for the request being processed
54
+ RequestId string
55
+
56
+ // Headers is a map of the response headers. Nil during body processing
57
+ Headers map [string ]string
58
+
59
+ // Body Is the body of the response or nil during header processing
60
+ Body string
61
+
62
+ // IsStreaming indicates whether or not the response is being streamed by the model
63
+ IsStreaming bool
64
+
65
+ // EndOfStream when true indicates that this invocation contains the last chunk of the response
66
+ EndOfStream bool
67
+ }
68
+
48
69
type Pod interface {
49
70
GetPod () * backend.Pod
50
71
GetMetrics () * backendmetrics.Metrics
@@ -61,6 +82,7 @@ type SchedulingContext struct {
61
82
context.Context
62
83
Logger logr.Logger
63
84
Req * LLMRequest
85
+ Resp * LLMResponse
64
86
PodsSnapshot []Pod
65
87
}
66
88
@@ -84,12 +106,13 @@ type PodMetrics struct {
84
106
* backendmetrics.Metrics
85
107
}
86
108
87
- func NewSchedulingContext (ctx context.Context , req * LLMRequest , pods []Pod ) * SchedulingContext {
109
+ func NewSchedulingContext (ctx context.Context , req * LLMRequest , resp * LLMResponse , pods []Pod ) * SchedulingContext {
88
110
logger := log .FromContext (ctx ).WithValues ("request" , req )
89
111
return & SchedulingContext {
90
112
Context : ctx ,
91
113
Logger : logger ,
92
114
Req : req ,
115
+ Resp : resp ,
93
116
PodsSnapshot : pods ,
94
117
}
95
118
}
0 commit comments