Skip to content

Commit 519bee8

Browse files
authored
renamed Metrics to MetricsState and move to a separate file (#822)
Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 7207ed6 commit 519bee8

File tree

15 files changed

+183
-158
lines changed

15 files changed

+183
-158
lines changed

pkg/epp/backend/metrics/fake.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ import (
3131
// FakePodMetrics is an implementation of PodMetrics that doesn't run the async refresh loop.
3232
type FakePodMetrics struct {
3333
Pod *backend.Pod
34-
Metrics *Metrics
34+
Metrics *MetricsState
3535
}
3636

3737
func (fpm *FakePodMetrics) String() string {
@@ -41,7 +41,7 @@ func (fpm *FakePodMetrics) String() string {
4141
func (fpm *FakePodMetrics) GetPod() *backend.Pod {
4242
return fpm.Pod
4343
}
44-
func (fpm *FakePodMetrics) GetMetrics() *Metrics {
44+
func (fpm *FakePodMetrics) GetMetrics() *MetricsState {
4545
return fpm.Metrics
4646
}
4747
func (fpm *FakePodMetrics) UpdatePod(pod *corev1.Pod) {
@@ -53,10 +53,10 @@ type FakePodMetricsClient struct {
5353
errMu sync.RWMutex
5454
Err map[types.NamespacedName]error
5555
resMu sync.RWMutex
56-
Res map[types.NamespacedName]*Metrics
56+
Res map[types.NamespacedName]*MetricsState
5757
}
5858

59-
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error) {
59+
func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
6060
f.errMu.RLock()
6161
err, ok := f.Err[pod.NamespacedName]
6262
f.errMu.RUnlock()
@@ -73,7 +73,7 @@ func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Po
7373
return res.Clone(), nil
7474
}
7575

76-
func (f *FakePodMetricsClient) SetRes(new map[types.NamespacedName]*Metrics) {
76+
func (f *FakePodMetricsClient) SetRes(new map[types.NamespacedName]*MetricsState) {
7777
f.resMu.Lock()
7878
defer f.resMu.Unlock()
7979
f.Res = new

pkg/epp/backend/metrics/metrics.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ type PodMetricsClientImpl struct {
4141
}
4242

4343
// FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
44-
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error) {
44+
func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
4545
// Currently the metrics endpoint is hard-coded, which works with vLLM.
4646
// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
4747
url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"
@@ -73,8 +73,8 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
7373
// promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
7474
func (p *PodMetricsClientImpl) promToPodMetrics(
7575
metricFamilies map[string]*dto.MetricFamily,
76-
existing *Metrics,
77-
) (*Metrics, error) {
76+
existing *MetricsState,
77+
) (*MetricsState, error) {
7878
var errs error
7979
updated := existing.Clone()
8080

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import (
20+
"fmt"
21+
"time"
22+
)
23+
24+
// newMetricsState initializes a new MetricsState and returns its pointer.
25+
func newMetricsState() *MetricsState {
26+
return &MetricsState{
27+
ActiveModels: make(map[string]int),
28+
WaitingModels: make(map[string]int),
29+
}
30+
}
31+
32+
// MetricsState holds the latest state of the metrics that were scraped from a pod.
33+
type MetricsState struct {
34+
// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
35+
ActiveModels map[string]int
36+
WaitingModels map[string]int
37+
// MaxActiveModels is the maximum number of models that can be loaded to GPU.
38+
MaxActiveModels int
39+
RunningQueueSize int
40+
WaitingQueueSize int
41+
KVCacheUsagePercent float64
42+
KvCacheMaxTokenCapacity int
43+
44+
// UpdateTime record the last time when the metrics were updated.
45+
UpdateTime time.Time
46+
}
47+
48+
// String returns a string with all MetricState information
49+
func (s *MetricsState) String() string {
50+
if s == nil {
51+
return ""
52+
}
53+
return fmt.Sprintf("%+v", *s)
54+
}
55+
56+
// Clone creates a copy of MetricsState and returns its pointer.
57+
// Clone returns nil if the object being cloned is nil.
58+
func (s *MetricsState) Clone() *MetricsState {
59+
if s == nil {
60+
return nil
61+
}
62+
activeModels := make(map[string]int, len(s.ActiveModels))
63+
for key, value := range s.ActiveModels {
64+
activeModels[key] = value
65+
}
66+
waitingModels := make(map[string]int, len(s.WaitingModels))
67+
for key, value := range s.WaitingModels {
68+
waitingModels[key] = value
69+
}
70+
return &MetricsState{
71+
ActiveModels: activeModels,
72+
WaitingModels: waitingModels,
73+
MaxActiveModels: s.MaxActiveModels,
74+
RunningQueueSize: s.RunningQueueSize,
75+
WaitingQueueSize: s.WaitingQueueSize,
76+
KVCacheUsagePercent: s.KVCacheUsagePercent,
77+
KvCacheMaxTokenCapacity: s.KvCacheMaxTokenCapacity,
78+
UpdateTime: s.UpdateTime,
79+
}
80+
}

pkg/epp/backend/metrics/metrics_test.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -377,8 +377,8 @@ func TestPromToPodMetrics(t *testing.T) {
377377
name string
378378
metricFamilies map[string]*dto.MetricFamily
379379
mapping *MetricMapping
380-
existingMetrics *Metrics
381-
expectedMetrics *Metrics
380+
existingMetrics *MetricsState
381+
expectedMetrics *MetricsState
382382
expectedErr error // Count of expected errors
383383
}{
384384
{
@@ -401,8 +401,8 @@ func TestPromToPodMetrics(t *testing.T) {
401401
KVCacheUtilization: &MetricSpec{MetricName: "vllm_usage"},
402402
LoraRequestInfo: &MetricSpec{MetricName: "vllm:lora_requests_info"},
403403
},
404-
existingMetrics: &Metrics{},
405-
expectedMetrics: &Metrics{
404+
existingMetrics: &MetricsState{},
405+
expectedMetrics: &MetricsState{
406406
WaitingQueueSize: 7,
407407
KVCacheUsagePercent: 0.8,
408408
ActiveModels: map[string]int{"lora1": 0, "lora2": 0},
@@ -418,8 +418,8 @@ func TestPromToPodMetrics(t *testing.T) {
418418
KVCacheUtilization: &MetricSpec{MetricName: "vllm_usage"},
419419
LoraRequestInfo: &MetricSpec{MetricName: "vllm:lora_requests_info"},
420420
},
421-
existingMetrics: &Metrics{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
422-
expectedMetrics: &Metrics{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
421+
existingMetrics: &MetricsState{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
422+
expectedMetrics: &MetricsState{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
423423
expectedErr: multierr.Combine(errors.New("metric family \"vllm_waiting\" not found"), errors.New("metric family \"vllm_usage\" not found"), errors.New("metric family \"vllm:lora_requests_info\" not found")),
424424
},
425425
{
@@ -437,8 +437,8 @@ func TestPromToPodMetrics(t *testing.T) {
437437
KVCacheUtilization: &MetricSpec{MetricName: "vllm_usage"},
438438
LoraRequestInfo: &MetricSpec{MetricName: "vllm:lora_requests_info"},
439439
},
440-
existingMetrics: &Metrics{},
441-
expectedMetrics: &Metrics{
440+
existingMetrics: &MetricsState{},
441+
expectedMetrics: &MetricsState{
442442
WaitingQueueSize: 0,
443443
KVCacheUsagePercent: 0.8,
444444
ActiveModels: map[string]int{"lora1": 0, "lora2": 0},
@@ -457,8 +457,8 @@ func TestPromToPodMetrics(t *testing.T) {
457457
mapping: &MetricMapping{
458458
LoraRequestInfo: &MetricSpec{MetricName: "vllm:lora_requests_info"},
459459
},
460-
existingMetrics: &Metrics{},
461-
expectedMetrics: &Metrics{
460+
existingMetrics: &MetricsState{},
461+
expectedMetrics: &MetricsState{
462462
ActiveModels: map[string]int{"lora1": 0},
463463
WaitingModels: map[string]int{},
464464
MaxActiveModels: 0, // Should still default to 0.
@@ -494,7 +494,7 @@ func TestFetchMetrics(t *testing.T) {
494494
Name: "pod",
495495
},
496496
}
497-
existing := &Metrics{}
497+
existing := &MetricsState{}
498498
p := &PodMetricsClientImpl{} // No MetricMapping needed for this basic test
499499

500500
_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use.

pkg/epp/backend/metrics/pod_metrics.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ const (
3737

3838
type podMetrics struct {
3939
pod atomic.Pointer[backend.Pod]
40-
metrics atomic.Pointer[Metrics]
40+
metrics atomic.Pointer[MetricsState]
4141
pmc PodMetricsClient
4242
ds Datastore
4343
interval time.Duration
@@ -49,7 +49,7 @@ type podMetrics struct {
4949
}
5050

5151
type PodMetricsClient interface {
52-
FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error)
52+
FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error)
5353
}
5454

5555
func (pm *podMetrics) String() string {
@@ -60,7 +60,7 @@ func (pm *podMetrics) GetPod() *backend.Pod {
6060
return pm.pod.Load()
6161
}
6262

63-
func (pm *podMetrics) GetMetrics() *Metrics {
63+
func (pm *podMetrics) GetMetrics() *MetricsState {
6464
return pm.metrics.Load()
6565
}
6666

pkg/epp/backend/metrics/pod_metrics_test.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ var (
3636
Namespace: "default",
3737
},
3838
}
39-
initial = &Metrics{
39+
initial = &MetricsState{
4040
WaitingQueueSize: 0,
4141
KVCacheUsagePercent: 0.2,
4242
MaxActiveModels: 2,
@@ -46,7 +46,7 @@ var (
4646
},
4747
WaitingModels: map[string]int{},
4848
}
49-
updated = &Metrics{
49+
updated = &MetricsState{
5050
WaitingQueueSize: 9999,
5151
KVCacheUsagePercent: 0.99,
5252
MaxActiveModels: 99,
@@ -69,16 +69,16 @@ func TestMetricsRefresh(t *testing.T) {
6969
namespacedName := types.NamespacedName{Name: pod1.Name, Namespace: pod1.Namespace}
7070
// Use SetRes to simulate an update of metrics from the pod.
7171
// Verify that the metrics are updated.
72-
pmc.SetRes(map[types.NamespacedName]*Metrics{namespacedName: initial})
72+
pmc.SetRes(map[types.NamespacedName]*MetricsState{namespacedName: initial})
7373
condition := func(collect *assert.CollectT) {
74-
assert.True(collect, cmp.Equal(pm.GetMetrics(), initial, cmpopts.IgnoreFields(Metrics{}, "UpdateTime")))
74+
assert.True(collect, cmp.Equal(pm.GetMetrics(), initial, cmpopts.IgnoreFields(MetricsState{}, "UpdateTime")))
7575
}
7676
assert.EventuallyWithT(t, condition, time.Second, time.Millisecond)
7777

7878
// Stop the loop, and simulate metric update again, this time the PodMetrics won't get the
7979
// new update.
8080
pm.StopRefreshLoop()
81-
pmc.SetRes(map[types.NamespacedName]*Metrics{namespacedName: updated})
81+
pmc.SetRes(map[types.NamespacedName]*MetricsState{namespacedName: updated})
8282
// Still expect the same condition (no metrics update).
8383
assert.EventuallyWithT(t, condition, time.Second, time.Millisecond)
8484
}

pkg/epp/backend/metrics/types.go

Lines changed: 2 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ package metrics
1919

2020
import (
2121
"context"
22-
"fmt"
2322
"sync"
2423
"time"
2524

@@ -51,70 +50,16 @@ func (f *PodMetricsFactory) NewPodMetrics(parentCtx context.Context, in *corev1.
5150
logger: log.FromContext(parentCtx).WithValues("pod", pod.NamespacedName),
5251
}
5352
pm.pod.Store(pod)
54-
pm.metrics.Store(newMetrics())
53+
pm.metrics.Store(newMetricsState())
5554

5655
pm.startRefreshLoop(parentCtx)
5756
return pm
5857
}
5958

6059
type PodMetrics interface {
6160
GetPod() *backend.Pod
62-
GetMetrics() *Metrics
61+
GetMetrics() *MetricsState
6362
UpdatePod(*corev1.Pod)
6463
StopRefreshLoop()
6564
String() string
6665
}
67-
68-
type Metrics struct {
69-
// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
70-
ActiveModels map[string]int
71-
WaitingModels map[string]int
72-
// MaxActiveModels is the maximum number of models that can be loaded to GPU.
73-
MaxActiveModels int
74-
RunningQueueSize int
75-
WaitingQueueSize int
76-
KVCacheUsagePercent float64
77-
KvCacheMaxTokenCapacity int
78-
79-
// UpdateTime record the last time when the metrics were updated.
80-
UpdateTime time.Time
81-
}
82-
83-
func newMetrics() *Metrics {
84-
return &Metrics{
85-
ActiveModels: make(map[string]int),
86-
WaitingModels: make(map[string]int),
87-
}
88-
}
89-
90-
func (m *Metrics) String() string {
91-
if m == nil {
92-
return ""
93-
}
94-
return fmt.Sprintf("%+v", *m)
95-
}
96-
97-
func (m *Metrics) Clone() *Metrics {
98-
if m == nil {
99-
return nil
100-
}
101-
cm := make(map[string]int, len(m.ActiveModels))
102-
for k, v := range m.ActiveModels {
103-
cm[k] = v
104-
}
105-
wm := make(map[string]int, len(m.WaitingModels))
106-
for k, v := range m.WaitingModels {
107-
wm[k] = v
108-
}
109-
clone := &Metrics{
110-
ActiveModels: cm,
111-
WaitingModels: wm,
112-
MaxActiveModels: m.MaxActiveModels,
113-
RunningQueueSize: m.RunningQueueSize,
114-
WaitingQueueSize: m.WaitingQueueSize,
115-
KVCacheUsagePercent: m.KVCacheUsagePercent,
116-
KvCacheMaxTokenCapacity: m.KvCacheMaxTokenCapacity,
117-
UpdateTime: m.UpdateTime,
118-
}
119-
return clone
120-
}

0 commit comments

Comments
 (0)