Skip to content

Commit fdef0ea

Browse files
JeffLuookfswain
authored andcommitted
Add metrics validation in integration test (kubernetes-sigs#413)
Start by adding request total metrics, more validation will be added in follow up. kubernetes-sigs#326
1 parent 228b5cf commit fdef0ea

File tree

1 file changed

+77
-5
lines changed

1 file changed

+77
-5
lines changed

test/integration/hermetic_test.go

Lines changed: 77 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@ import (
2424
"errors"
2525
"fmt"
2626
"io"
27+
"net"
28+
"net/http"
2729
"os"
2830
"path/filepath"
31+
"strconv"
32+
"strings"
2933
"testing"
3034
"time"
3135

3236
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
3337
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
3438
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
3539
"github.com/google/go-cmp/cmp"
40+
"github.com/prometheus/client_golang/prometheus/promhttp"
3641
"github.com/stretchr/testify/assert"
3742
"google.golang.org/grpc"
3843
"google.golang.org/grpc/credentials/insecure"
@@ -43,12 +48,16 @@ import (
4348
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
4449
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
4550
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
51+
"k8s.io/component-base/metrics/legacyregistry"
52+
metricsutils "k8s.io/component-base/metrics/testutil"
4653
ctrl "sigs.k8s.io/controller-runtime"
4754
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
4855
"sigs.k8s.io/controller-runtime/pkg/envtest"
56+
"sigs.k8s.io/controller-runtime/pkg/manager"
4957
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
5058
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
5159
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
60+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
5261
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5362
extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
5463
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -57,7 +66,8 @@ import (
5766
)
5867

5968
const (
60-
port = runserver.DefaultGrpcPort
69+
port = runserver.DefaultGrpcPort
70+
metricsPort = 8888
6171
)
6272

6373
var (
@@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
7686
wantHeaders []*configPb.HeaderValueOption
7787
wantMetadata *structpb.Struct
7888
wantBody []byte
89+
wantMetrics string
7990
wantErr bool
8091
immediateResponse *extProcPb.ImmediateResponse
8192
}{
@@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
113124
},
114125
wantMetadata: makeMetadata("address-1:8000"),
115126
wantBody: []byte("{\"max_tokens\":100,\"model\":\"my-model-12345\",\"prompt\":\"test1\",\"temperature\":0}"),
116-
wantErr: false,
127+
wantMetrics: `
128+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
129+
# TYPE inference_model_request_total counter
130+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
131+
`,
132+
wantErr: false,
117133
},
118134
{
119135
name: "select active lora, low queue",
@@ -161,7 +177,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
161177
},
162178
wantMetadata: makeMetadata("address-1:8000"),
163179
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test2\",\"temperature\":0}"),
164-
wantErr: false,
180+
wantMetrics: `
181+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
182+
# TYPE inference_model_request_total counter
183+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
184+
`,
185+
wantErr: false,
165186
},
166187
{
167188
name: "select no lora despite active model, avoid excessive queue size",
@@ -210,7 +231,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
210231
},
211232
wantMetadata: makeMetadata("address-2:8000"),
212233
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test3\",\"temperature\":0}"),
213-
wantErr: false,
234+
wantMetrics: `
235+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
236+
# TYPE inference_model_request_total counter
237+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
238+
`,
239+
wantErr: false,
214240
},
215241
{
216242
name: "noncritical and all models past threshold, shed request",
@@ -253,6 +279,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
253279
Code: envoyTypePb.StatusCode_TooManyRequests,
254280
},
255281
},
282+
wantMetrics: "",
256283
},
257284
{
258285
name: "noncritical, but one server has capacity, do not shed",
@@ -301,7 +328,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
301328
},
302329
wantMetadata: makeMetadata("address-0:8000"),
303330
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg3\",\"prompt\":\"test5\",\"temperature\":0}"),
304-
wantErr: false,
331+
wantMetrics: `
332+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
333+
# TYPE inference_model_request_total counter
334+
inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
335+
`,
336+
wantErr: false,
305337
},
306338
}
307339

@@ -345,6 +377,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
345377
if diff := cmp.Diff(want, res, protocmp.Transform()); diff != "" {
346378
t.Errorf("Unexpected response, (-want +got): %v", diff)
347379
}
380+
381+
if test.wantMetrics != "" {
382+
if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.wantMetrics), "inference_model_request_total"); err != nil {
383+
t.Error(err)
384+
}
385+
}
386+
387+
legacyregistry.Reset()
348388
})
349389
}
350390
}
@@ -424,6 +464,10 @@ func BeforeSuit(t *testing.T) func() {
424464
logutil.Fatal(logger, err, "Failed to create controller manager")
425465
}
426466

467+
if err := registerMetricsHandler(mgr, metricsPort); err != nil {
468+
logutil.Fatal(logger, err, "Failed to register metrics handler")
469+
}
470+
427471
serverRunner = runserver.NewDefaultExtProcServerRunner()
428472
// Adjust from defaults
429473
serverRunner.PoolName = "vllm-llama2-7b-pool"
@@ -544,3 +588,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
544588
},
545589
}
546590
}
591+
592+
// registerMetricsHandler is a simplified version of metrics endpoint handler
593+
// without Authentication for integration tests.
594+
func registerMetricsHandler(mgr manager.Manager, port int) error {
595+
metrics.Register()
596+
597+
// Init HTTP server.
598+
h := promhttp.HandlerFor(
599+
legacyregistry.DefaultGatherer,
600+
promhttp.HandlerOpts{},
601+
)
602+
603+
mux := http.NewServeMux()
604+
mux.Handle("/metrics", h)
605+
606+
srv := &http.Server{
607+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
608+
Handler: mux,
609+
}
610+
611+
if err := mgr.Add(&manager.Server{
612+
Name: "metrics",
613+
Server: srv,
614+
}); err != nil {
615+
return err
616+
}
617+
return nil
618+
}

0 commit comments

Comments
 (0)