@@ -2,13 +2,15 @@ package handlers
2
2
3
3
import (
4
4
"io"
5
+ "time"
5
6
6
7
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
7
8
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
8
9
"google.golang.org/grpc/codes"
9
10
"google.golang.org/grpc/status"
10
11
"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
11
12
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
13
+ "inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
12
14
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
13
15
klog "k8s.io/klog/v2"
14
16
)
@@ -75,22 +77,30 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
75
77
var resp * extProcPb.ProcessingResponse
76
78
switch v := req .Request .(type ) {
77
79
case * extProcPb.ProcessingRequest_RequestHeaders :
80
+ reqCtx .RequestReceivedTimestamp = time .Now ()
78
81
resp = HandleRequestHeaders (reqCtx , req )
79
82
klog .V (3 ).Infof ("Request context after HandleRequestHeaders: %+v" , reqCtx )
80
83
case * extProcPb.ProcessingRequest_RequestBody :
81
84
resp , err = s .HandleRequestBody (reqCtx , req )
85
+ if err == nil {
86
+ metrics .RecordRequestCounter (reqCtx .Model , reqCtx .ResolvedTargetModel )
87
+ metrics .RecordRequestSizes (reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .RequestSize )
88
+ }
82
89
klog .V (3 ).Infof ("Request context after HandleRequestBody: %+v" , reqCtx )
83
90
case * extProcPb.ProcessingRequest_ResponseHeaders :
84
91
resp , err = s .HandleResponseHeaders (reqCtx , req )
85
92
klog .V (3 ).Infof ("Request context after HandleResponseHeaders: %+v" , reqCtx )
86
93
case * extProcPb.ProcessingRequest_ResponseBody :
87
94
resp , err = s .HandleResponseBody (reqCtx , req )
95
+ reqCtx .ResponseCompleteTimestamp = time .Now ()
96
+ if err == nil {
97
+ metrics .RecordRequestLatencies (reqCtx .Model , reqCtx .ResolvedTargetModel , reqCtx .RequestReceivedTimestamp , reqCtx .ResponseCompleteTimestamp )
98
+ }
88
99
klog .V (3 ).Infof ("Request context after HandleResponseBody: %+v" , reqCtx )
89
100
default :
90
101
klog .Errorf ("Unknown Request type %+v" , v )
91
102
return status .Error (codes .Unknown , "unknown request type" )
92
103
}
93
-
94
104
if err != nil {
95
105
klog .Errorf ("failed to process request: %v" , err )
96
106
switch status .Code (err ) {
@@ -123,5 +133,9 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
123
133
type RequestContext struct {
124
134
TargetPod backend.Pod
125
135
Model string
136
+ ResolvedTargetModel string
137
+ RequestReceivedTimestamp time.Time
138
+ ResponseCompleteTimestamp time.Time
139
+ RequestSize int
126
140
Response Response
127
141
}
0 commit comments