Skip to content

Commit 47382f1

Browse files
committed
Add metrics validation in integration test
Start by adding request total metrics, more validation will be added in follow up. kubernetes-sigs#326
1 parent 2ad70e3 commit 47382f1

File tree

1 file changed

+82
-5
lines changed

1 file changed

+82
-5
lines changed

test/integration/hermetic_test.go

Lines changed: 82 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,20 @@ import (
2424
"errors"
2525
"fmt"
2626
"io"
27+
"net"
28+
"net/http"
2729
"os"
2830
"path/filepath"
31+
"strconv"
32+
"strings"
2933
"testing"
3034
"time"
3135

3236
configPb "github.com/envoyproxy/go-control-plane/envoy/config/core/v3"
3337
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
3438
envoyTypePb "github.com/envoyproxy/go-control-plane/envoy/type/v3"
3539
"github.com/google/go-cmp/cmp"
40+
"github.com/prometheus/client_golang/prometheus/promhttp"
3641
"github.com/stretchr/testify/assert"
3742
"google.golang.org/grpc"
3843
"google.golang.org/grpc/credentials/insecure"
@@ -43,12 +48,16 @@ import (
4348
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
4449
k8syaml "k8s.io/apimachinery/pkg/util/yaml"
4550
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
51+
"k8s.io/component-base/metrics/legacyregistry"
52+
metricsutils "k8s.io/component-base/metrics/testutil"
4653
ctrl "sigs.k8s.io/controller-runtime"
4754
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
4855
"sigs.k8s.io/controller-runtime/pkg/envtest"
56+
"sigs.k8s.io/controller-runtime/pkg/manager"
4957
"sigs.k8s.io/gateway-api-inference-extension/api/v1alpha2"
5058
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend"
5159
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
60+
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
5261
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/server"
5362
extprocutils "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/test"
5463
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
@@ -57,7 +66,8 @@ import (
5766
)
5867

5968
const (
60-
port = runserver.DefaultGrpcPort
69+
port = runserver.DefaultGrpcPort
70+
metricsPort = 8888
6171
)
6272

6373
var (
@@ -76,6 +86,7 @@ func TestKubeInferenceModelRequest(t *testing.T) {
7686
wantHeaders []*configPb.HeaderValueOption
7787
wantMetadata *structpb.Struct
7888
wantBody []byte
89+
wantMetrics string
7990
wantErr bool
8091
immediateResponse *extProcPb.ImmediateResponse
8192
}{
@@ -113,7 +124,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
113124
},
114125
wantMetadata: makeMetadata("address-1:8000"),
115126
wantBody: []byte("{\"max_tokens\":100,\"model\":\"my-model-12345\",\"prompt\":\"test1\",\"temperature\":0}"),
116-
wantErr: false,
127+
wantMetrics: `
128+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
129+
# TYPE inference_model_request_total counter
130+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
131+
`,
132+
wantErr: false,
117133
},
118134
{
119135
name: "select active lora, low queue",
@@ -161,7 +177,13 @@ func TestKubeInferenceModelRequest(t *testing.T) {
161177
},
162178
wantMetadata: makeMetadata("address-1:8000"),
163179
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test2\",\"temperature\":0}"),
164-
wantErr: false,
180+
wantMetrics: `
181+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
182+
# TYPE inference_model_request_total counter
183+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
184+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 1
185+
`,
186+
wantErr: false,
165187
},
166188
{
167189
name: "select no lora despite active model, avoid excessive queue size",
@@ -210,7 +232,13 @@ func TestKubeInferenceModelRequest(t *testing.T) {
210232
},
211233
wantMetadata: makeMetadata("address-2:8000"),
212234
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg2\",\"prompt\":\"test3\",\"temperature\":0}"),
213-
wantErr: false,
235+
wantMetrics: `
236+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
237+
# TYPE inference_model_request_total counter
238+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
239+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 2
240+
`,
241+
wantErr: false,
214242
},
215243
{
216244
name: "noncritical and all models past threshold, shed request",
@@ -253,6 +281,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
253281
Code: envoyTypePb.StatusCode_TooManyRequests,
254282
},
255283
},
284+
wantMetrics: `
285+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
286+
# TYPE inference_model_request_total counter
287+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
288+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 2
289+
`,
256290
},
257291
{
258292
name: "noncritical, but one server has capacity, do not shed",
@@ -301,7 +335,14 @@ func TestKubeInferenceModelRequest(t *testing.T) {
301335
},
302336
wantMetadata: makeMetadata("address-0:8000"),
303337
wantBody: []byte("{\"max_tokens\":100,\"model\":\"sql-lora-1fdg3\",\"prompt\":\"test5\",\"temperature\":0}"),
304-
wantErr: false,
338+
wantMetrics: `
339+
# HELP inference_model_request_total [ALPHA] Counter of inference model requests broken out for each model and target model.
340+
# TYPE inference_model_request_total counter
341+
inference_model_request_total{model_name="my-model",target_model_name="my-model-12345"} 1
342+
inference_model_request_total{model_name="sql-lora",target_model_name="sql-lora-1fdg2"} 2
343+
inference_model_request_total{model_name="sql-lora-sheddable",target_model_name="sql-lora-1fdg3"} 1
344+
`,
345+
wantErr: false,
305346
},
306347
}
307348

@@ -345,6 +386,12 @@ func TestKubeInferenceModelRequest(t *testing.T) {
345386
if diff := cmp.Diff(want, res, protocmp.Transform()); diff != "" {
346387
t.Errorf("Unexpected response, (-want +got): %v", diff)
347388
}
389+
390+
if test.wantMetrics != "" {
391+
if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(test.wantMetrics), "inference_model_request_total"); err != nil {
392+
t.Error(err)
393+
}
394+
}
348395
})
349396
}
350397
}
@@ -423,6 +470,8 @@ func BeforeSuit(t *testing.T) func() {
423470
logutil.Fatal(logger, err, "Failed to create controller manager")
424471
}
425472

473+
registerMetricsHandler(mgr, metricsPort)
474+
426475
serverRunner = runserver.NewDefaultExtProcServerRunner()
427476
// Adjust from defaults
428477
serverRunner.PoolName = "vllm-llama2-7b-pool"
@@ -543,3 +592,31 @@ func makeMetadata(endpoint string) *structpb.Struct {
543592
},
544593
}
545594
}
595+
596+
// registerMetricsHandler is a simplified version of metrics endpoint handler
597+
// without Authentication for integration tests.
598+
func registerMetricsHandler(mgr manager.Manager, port int) error {
599+
metrics.Register()
600+
601+
// Init HTTP server.
602+
h := promhttp.HandlerFor(
603+
legacyregistry.DefaultGatherer,
604+
promhttp.HandlerOpts{},
605+
)
606+
607+
mux := http.NewServeMux()
608+
mux.Handle("/metrics", h)
609+
610+
srv := &http.Server{
611+
Addr: net.JoinHostPort("", strconv.Itoa(port)),
612+
Handler: mux,
613+
}
614+
615+
if err := mgr.Add(&manager.Server{
616+
Name: "metrics",
617+
Server: srv,
618+
}); err != nil {
619+
return err
620+
}
621+
return nil
622+
}

0 commit comments

Comments
 (0)