Skip to content

Commit d55ead7

Browse files
authored
feat: merge two metric servers (#728)
* feat: migrate epp metric server Signed-off-by: nayihz <[email protected]> * feat: migrate bbr metric server Signed-off-by: nayihz <[email protected]> * fix: metric reset not effect Signed-off-by: nayihz <[email protected]> * fix: add the stability level to the help message of the metric * fix: refactor custom inferencepool metric Signed-off-by: nayihz <[email protected]> --------- Signed-off-by: nayihz <[email protected]>
1 parent a5bf0ac commit d55ead7

File tree

11 files changed

+281
-376
lines changed

11 files changed

+281
-376
lines changed

cmd/bbr/main.go

Lines changed: 16 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -18,26 +18,23 @@ package main
1818

1919
import (
2020
"flag"
21-
"net"
22-
"net/http"
21+
"fmt"
2322
"os"
24-
"strconv"
2523

2624
"github.com/go-logr/logr"
27-
"github.com/prometheus/client_golang/prometheus/promhttp"
2825
uberzap "go.uber.org/zap"
2926
"go.uber.org/zap/zapcore"
3027
"google.golang.org/grpc"
3128
healthPb "google.golang.org/grpc/health/grpc_health_v1"
32-
"k8s.io/client-go/rest"
33-
"k8s.io/component-base/metrics/legacyregistry"
3429
ctrl "sigs.k8s.io/controller-runtime"
3530
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3631
"sigs.k8s.io/controller-runtime/pkg/manager"
3732
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
33+
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
34+
3835
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
36+
"sigs.k8s.io/gateway-api-inference-extension/pkg/bbr/metrics"
3937
runserver "sigs.k8s.io/gateway-api-inference-extension/pkg/bbr/server"
40-
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/metrics"
4138
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
4239
)
4340

@@ -85,7 +82,18 @@ func run() error {
8582
return err
8683
}
8784

88-
mgr, err := ctrl.NewManager(cfg, ctrl.Options{})
85+
metrics.Register()
86+
87+
// Register metrics handler.
88+
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
89+
// More info:
90+
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
91+
// - https://book.kubebuilder.io/reference/metrics.html
92+
metricsServerOptions := metricsserver.Options{
93+
BindAddress: fmt.Sprintf(":%d", *metricsPort),
94+
FilterProvider: filters.WithAuthenticationAndAuthorization,
95+
}
96+
mgr, err := ctrl.NewManager(cfg, ctrl.Options{Metrics: metricsServerOptions})
8997
if err != nil {
9098
setupLog.Error(err, "Failed to create manager", "config", cfg)
9199
return err
@@ -107,11 +115,6 @@ func run() error {
107115
return err
108116
}
109117

110-
// Register metrics handler.
111-
if err := registerMetricsHandler(mgr, *metricsPort, cfg); err != nil {
112-
return err
113-
}
114-
115118
// Start the manager. This blocks until a signal is received.
116119
setupLog.Info("Manager starting")
117120
if err := mgr.Start(ctx); err != nil {
@@ -152,58 +155,3 @@ func initLogging(opts *zap.Options) {
152155
logger := zap.New(zap.UseFlagOptions(opts), zap.RawZapOpts(uberzap.AddCaller()))
153156
ctrl.SetLogger(logger)
154157
}
155-
156-
const metricsEndpoint = "/metrics"
157-
158-
// registerMetricsHandler adds the metrics HTTP handler as a Runnable to the given manager.
159-
func registerMetricsHandler(mgr manager.Manager, port int, cfg *rest.Config) error {
160-
metrics.Register()
161-
162-
// Init HTTP server.
163-
h, err := metricsHandlerWithAuthenticationAndAuthorization(cfg)
164-
if err != nil {
165-
return err
166-
}
167-
168-
mux := http.NewServeMux()
169-
mux.Handle(metricsEndpoint, h)
170-
171-
srv := &http.Server{
172-
Addr: net.JoinHostPort("", strconv.Itoa(port)),
173-
Handler: mux,
174-
}
175-
176-
if err := mgr.Add(&manager.Server{
177-
Name: "metrics",
178-
Server: srv,
179-
}); err != nil {
180-
setupLog.Error(err, "Failed to register metrics HTTP handler")
181-
return err
182-
}
183-
return nil
184-
}
185-
186-
func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) (http.Handler, error) {
187-
h := promhttp.HandlerFor(
188-
legacyregistry.DefaultGatherer,
189-
promhttp.HandlerOpts{},
190-
)
191-
httpClient, err := rest.HTTPClientFor(cfg)
192-
if err != nil {
193-
setupLog.Error(err, "Failed to create http client for metrics auth")
194-
return nil, err
195-
}
196-
197-
filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient)
198-
if err != nil {
199-
setupLog.Error(err, "Failed to create metrics filter for auth")
200-
return nil, err
201-
}
202-
metricsLogger := ctrl.Log.WithName("metrics").WithValues("path", metricsEndpoint)
203-
metricsAuthHandler, err := filter(metricsLogger, h)
204-
if err != nil {
205-
setupLog.Error(err, "Failed to create metrics auth handler")
206-
return nil, err
207-
}
208-
return metricsAuthHandler, nil
209-
}

cmd/epp/main.go

Lines changed: 26 additions & 81 deletions
Original file line numberDiff line numberDiff line change
@@ -19,25 +19,22 @@ package main
1919
import (
2020
"flag"
2121
"fmt"
22-
"net"
23-
"net/http"
2422
"os"
25-
"strconv"
2623

2724
"github.com/go-logr/logr"
28-
"github.com/prometheus/client_golang/prometheus/promhttp"
25+
"github.com/prometheus/client_golang/prometheus"
2926
uberzap "go.uber.org/zap"
3027
"go.uber.org/zap/zapcore"
3128
"google.golang.org/grpc"
3229
healthPb "google.golang.org/grpc/health/grpc_health_v1"
3330
"k8s.io/apimachinery/pkg/types"
34-
"k8s.io/client-go/rest"
35-
"k8s.io/component-base/metrics/legacyregistry"
3631
ctrl "sigs.k8s.io/controller-runtime"
3732
"sigs.k8s.io/controller-runtime/pkg/log"
3833
"sigs.k8s.io/controller-runtime/pkg/log/zap"
3934
"sigs.k8s.io/controller-runtime/pkg/manager"
4035
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
36+
metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server"
37+
4138
"sigs.k8s.io/gateway-api-inference-extension/internal/runnable"
4239
backendmetrics "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/backend/metrics"
4340
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/datastore"
@@ -53,10 +50,6 @@ import (
5350
"sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
5451
)
5552

56-
const (
57-
defaultMetricsEndpoint = "/metrics"
58-
)
59-
6053
var (
6154
grpcPort = flag.Int(
6255
"grpcPort",
@@ -163,16 +156,6 @@ func run() error {
163156
return err
164157
}
165158

166-
poolNamespacedName := types.NamespacedName{
167-
Name: *poolName,
168-
Namespace: *poolNamespace,
169-
}
170-
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg)
171-
if err != nil {
172-
setupLog.Error(err, "Failed to create controller manager")
173-
return err
174-
}
175-
176159
// Set up mapper for metric scraping.
177160
mapping, err := backendmetrics.NewMetricMapping(
178161
*totalQueuedRequestsMetric,
@@ -191,6 +174,29 @@ func run() error {
191174

192175
datastore := datastore.NewDatastore(ctx, pmf)
193176

177+
customCollectors := []prometheus.Collector{collectors.NewInferencePoolMetricsCollector(datastore)}
178+
metrics.Register(customCollectors...)
179+
metrics.RecordInferenceExtensionInfo()
180+
// Register metrics handler.
181+
// Metrics endpoint is enabled in 'config/default/kustomization.yaml'. The Metrics options configure the server.
182+
// More info:
183+
// - https://pkg.go.dev/sigs.k8s.io/[email protected]/pkg/metrics/server
184+
// - https://book.kubebuilder.io/reference/metrics.html
185+
metricsServerOptions := metricsserver.Options{
186+
BindAddress: fmt.Sprintf(":%d", *metricsPort),
187+
FilterProvider: filters.WithAuthenticationAndAuthorization,
188+
}
189+
190+
poolNamespacedName := types.NamespacedName{
191+
Name: *poolName,
192+
Namespace: *poolNamespace,
193+
}
194+
mgr, err := runserver.NewDefaultManager(poolNamespacedName, cfg, metricsServerOptions)
195+
if err != nil {
196+
setupLog.Error(err, "Failed to create controller manager")
197+
return err
198+
}
199+
194200
scheduler := scheduling.NewScheduler(datastore)
195201
if schedulerV2 == "true" {
196202
queueScorerWeight := envutil.GetEnvInt("QUEUE_SCORE_WEIGHT", scorer.DefaultQueueScorerWeight, setupLog)
@@ -239,11 +245,6 @@ func run() error {
239245
return err
240246
}
241247

242-
// Register metrics handler.
243-
if err := registerMetricsHandler(mgr, *metricsPort, cfg, datastore); err != nil {
244-
return err
245-
}
246-
247248
// Start the manager. This blocks until a signal is received.
248249
setupLog.Info("Controller manager starting")
249250
if err := mgr.Start(ctx); err != nil {
@@ -287,62 +288,6 @@ func registerHealthServer(mgr manager.Manager, logger logr.Logger, ds datastore.
287288
return nil
288289
}
289290

290-
// registerMetricsHandler adds the metrics HTTP handler as a Runnable to the given manager.
291-
func registerMetricsHandler(mgr manager.Manager, port int, cfg *rest.Config, ds datastore.Datastore) error {
292-
metrics.Register()
293-
legacyregistry.CustomMustRegister(collectors.NewInferencePoolMetricsCollector(ds))
294-
295-
metrics.RecordInferenceExtensionInfo()
296-
297-
// Init HTTP server.
298-
h, err := metricsHandlerWithAuthenticationAndAuthorization(cfg)
299-
if err != nil {
300-
return err
301-
}
302-
303-
mux := http.NewServeMux()
304-
mux.Handle(defaultMetricsEndpoint, h)
305-
306-
srv := &http.Server{
307-
Addr: net.JoinHostPort("", strconv.Itoa(port)),
308-
Handler: mux,
309-
}
310-
311-
if err := mgr.Add(&manager.Server{
312-
Name: "metrics",
313-
Server: srv,
314-
}); err != nil {
315-
setupLog.Error(err, "Failed to register metrics HTTP handler")
316-
return err
317-
}
318-
return nil
319-
}
320-
321-
func metricsHandlerWithAuthenticationAndAuthorization(cfg *rest.Config) (http.Handler, error) {
322-
h := promhttp.HandlerFor(
323-
legacyregistry.DefaultGatherer,
324-
promhttp.HandlerOpts{},
325-
)
326-
httpClient, err := rest.HTTPClientFor(cfg)
327-
if err != nil {
328-
setupLog.Error(err, "Failed to create http client for metrics auth")
329-
return nil, err
330-
}
331-
332-
filter, err := filters.WithAuthenticationAndAuthorization(cfg, httpClient)
333-
if err != nil {
334-
setupLog.Error(err, "Failed to create metrics filter for auth")
335-
return nil, err
336-
}
337-
metricsLogger := ctrl.Log.WithName("metrics").WithValues("path", defaultMetricsEndpoint)
338-
metricsAuthHandler, err := filter(metricsLogger, h)
339-
if err != nil {
340-
setupLog.Error(err, "Failed to create metrics auth handler")
341-
return nil, err
342-
}
343-
return metricsAuthHandler, nil
344-
}
345-
346291
func validateFlags() error {
347292
if *poolName == "" {
348293
return fmt.Errorf("required %q flag not set", "poolName")

pkg/bbr/handlers/request_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26,8 +26,8 @@ import (
2626
extProcPb "github.com/envoyproxy/go-control-plane/envoy/service/ext_proc/v3"
2727
"github.com/google/go-cmp/cmp"
2828
"google.golang.org/protobuf/testing/protocmp"
29-
"k8s.io/component-base/metrics/legacyregistry"
3029
metricsutils "k8s.io/component-base/metrics/testutil"
30+
crmetrics "sigs.k8s.io/controller-runtime/pkg/metrics"
3131
"sigs.k8s.io/gateway-api-inference-extension/pkg/bbr/metrics"
3232
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
3333
)
@@ -204,7 +204,7 @@ func TestHandleRequestBody(t *testing.T) {
204204
bbr_success_total{} 1
205205
`
206206

207-
if err := metricsutils.GatherAndCompare(legacyregistry.DefaultGatherer, strings.NewReader(wantMetrics), "inference_model_request_total"); err != nil {
207+
if err := metricsutils.GatherAndCompare(crmetrics.Registry, strings.NewReader(wantMetrics), "inference_model_request_total"); err != nil {
208208
t.Error(err)
209209
}
210210
}

pkg/bbr/metrics/metrics.go

Lines changed: 25 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -19,49 +19,48 @@ package metrics
1919
import (
2020
"sync"
2121

22+
"github.com/prometheus/client_golang/prometheus"
2223
compbasemetrics "k8s.io/component-base/metrics"
23-
"k8s.io/component-base/metrics/legacyregistry"
24+
"sigs.k8s.io/controller-runtime/pkg/metrics"
25+
26+
metricsutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/metrics"
2427
)
2528

2629
const component = "bbr"
2730

2831
var (
29-
successCounter = compbasemetrics.NewCounterVec(
30-
&compbasemetrics.CounterOpts{
31-
Subsystem: component,
32-
Name: "success_total",
33-
Help: "Count of successes pulling model name from body and injecting it in the request headers.",
34-
StabilityLevel: compbasemetrics.ALPHA,
32+
successCounter = prometheus.NewCounterVec(
33+
prometheus.CounterOpts{
34+
Subsystem: component,
35+
Name: "success_total",
36+
Help: metricsutil.HelpMsgWithStability("Count of successes pulling model name from body and injecting it in the request headers.", compbasemetrics.ALPHA),
3537
},
3638
[]string{},
3739
)
38-
modelNotInBodyCounter = compbasemetrics.NewCounterVec(
39-
&compbasemetrics.CounterOpts{
40-
Subsystem: component,
41-
Name: "model_not_in_body_total",
42-
Help: "Count of times the model was not present in the request body.",
43-
StabilityLevel: compbasemetrics.ALPHA,
40+
modelNotInBodyCounter = prometheus.NewCounterVec(
41+
prometheus.CounterOpts{
42+
Subsystem: component,
43+
Name: "model_not_in_body_total",
44+
Help: metricsutil.HelpMsgWithStability("Count of times the model was not present in the request body.", compbasemetrics.ALPHA),
4445
},
4546
[]string{},
4647
)
47-
modelNotParsedCounter = compbasemetrics.NewCounterVec(
48-
&compbasemetrics.CounterOpts{
49-
Subsystem: component,
50-
Name: "model_not_parsed_total",
51-
Help: "Count of times the model was in the request body but we could not parse it.",
52-
StabilityLevel: compbasemetrics.ALPHA,
48+
modelNotParsedCounter = prometheus.NewCounterVec(
49+
prometheus.CounterOpts{
50+
Subsystem: component,
51+
Name: "model_not_parsed_total",
52+
Help: metricsutil.HelpMsgWithStability("Count of times the model was in the request body but we could not parse it.", compbasemetrics.ALPHA),
5353
},
5454
[]string{},
5555
)
5656

5757
// TODO: Uncomment and use this metrics once the core server implementation has handling to skip body parsing if header exists.
5858
/*
59-
modelAlreadyPresentInHeaderCounter = compbasemetrics.NewCounterVec(
60-
&compbasemetrics.CounterOpts{
59+
modelAlreadyPresentInHeaderCounter = prometheus.NewCounterVec(
60+
prometheus.CounterOpts{
6161
Subsystem: component,
6262
Name: "model_already_present_in_header_total",
6363
Help: "Count of times the model was already present in request headers.",
64-
StabilityLevel: compbasemetrics.ALPHA,
6564
},
6665
[]string{},
6766
)
@@ -73,10 +72,10 @@ var registerMetrics sync.Once
7372
// Register all metrics.
7473
func Register() {
7574
registerMetrics.Do(func() {
76-
legacyregistry.MustRegister(successCounter)
77-
legacyregistry.MustRegister(modelNotInBodyCounter)
78-
legacyregistry.MustRegister(modelNotParsedCounter)
79-
// legacyregistry.MustRegister(modelAlreadyPresentInHeaderCounter)
75+
metrics.Registry.MustRegister(successCounter)
76+
metrics.Registry.MustRegister(modelNotInBodyCounter)
77+
metrics.Registry.MustRegister(modelNotParsedCounter)
78+
// metrics.Registry.MustRegister(modelAlreadyPresentInHeaderCounter)
8079
})
8180
}
8281

0 commit comments

Comments
 (0)