Skip to content

Commit 82da3b3

Browse files
committed
Updated request metrics to be handled in server processing loop
Signed-off-by: Jie WU <[email protected]>
1 parent 649aedf commit 82da3b3

File tree

1 file changed

+15
-1
lines changed

1 file changed

+15
-1
lines changed

pkg/ext-proc/handlers/server.go

+15-1
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,15 @@ package handlers
22

33
import (
44
"io"
5+
"time"
56

67
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
78
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
89
"google.golang.org/grpc/codes"
910
"google.golang.org/grpc/status"
1011
"inference.networking.x-k8s.io/gateway-api-inference-extension/api/v1alpha1"
1112
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/backend"
13+
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/metrics"
1214
"inference.networking.x-k8s.io/gateway-api-inference-extension/pkg/ext-proc/scheduling"
1315
klog "k8s.io/klog/v2"
1416
)
@@ -75,22 +77,30 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
7577
var resp *extProcPb.ProcessingResponse
7678
switch v := req.Request.(type) {
7779
case *extProcPb.ProcessingRequest_RequestHeaders:
80+
reqCtx.RequestReceivedTimestamp = time.Now()
7881
resp = HandleRequestHeaders(reqCtx, req)
7982
klog.V(3).Infof("Request context after HandleRequestHeaders: %+v", reqCtx)
8083
case *extProcPb.ProcessingRequest_RequestBody:
8184
resp, err = s.HandleRequestBody(reqCtx, req)
85+
if err == nil {
86+
metrics.RecordRequestCounter(reqCtx.Model, reqCtx.ResolvedTargetModel)
87+
metrics.RecordRequestSizes(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.RequestSize)
88+
}
8289
klog.V(3).Infof("Request context after HandleRequestBody: %+v", reqCtx)
8390
case *extProcPb.ProcessingRequest_ResponseHeaders:
8491
resp, err = s.HandleResponseHeaders(reqCtx, req)
8592
klog.V(3).Infof("Request context after HandleResponseHeaders: %+v", reqCtx)
8693
case *extProcPb.ProcessingRequest_ResponseBody:
8794
resp, err = s.HandleResponseBody(reqCtx, req)
95+
reqCtx.ResponseCompleteTimestamp = time.Now()
96+
if err == nil {
97+
metrics.RecordRequestLatencies(reqCtx.Model, reqCtx.ResolvedTargetModel, reqCtx.RequestReceivedTimestamp, reqCtx.ResponseCompleteTimestamp)
98+
}
8899
klog.V(3).Infof("Request context after HandleResponseBody: %+v", reqCtx)
89100
default:
90101
klog.Errorf("Unknown Request type %+v", v)
91102
return status.Error(codes.Unknown, "unknown request type")
92103
}
93-
94104
if err != nil {
95105
klog.Errorf("failed to process request: %v", err)
96106
switch status.Code(err) {
@@ -123,5 +133,9 @@ func (s *Server) Process(srv extProcPb.ExternalProcessor_ProcessServer) error {
123133
type RequestContext struct {
124134
TargetPod backend.Pod
125135
Model string
136+
ResolvedTargetModel string
137+
RequestReceivedTimestamp time.Time
138+
ResponseCompleteTimestamp time.Time
139+
RequestSize int
126140
Response Response
127141
}

0 commit comments

Comments
 (0)