kubernetes-sigs
diff --git a/‎pkg/epp/backend/metrics/fake.go
Lines changed: 5 additions & 5 deletions b/‎pkg/epp/backend/metrics/fake.go
Lines changed: 5 additions & 5 deletions
diff --git a/‎pkg/epp/backend/metrics/metrics.go
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/backend/metrics/metrics.go
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/backend/metrics/metrics_state.go
Lines changed: 80 additions & 0 deletions b/‎pkg/epp/backend/metrics/metrics_state.go
Lines changed: 80 additions & 0 deletions
diff --git a/‎pkg/epp/backend/metrics/metrics_test.go
Lines changed: 11 additions & 11 deletions b/‎pkg/epp/backend/metrics/metrics_test.go
Lines changed: 11 additions & 11 deletions
diff --git a/‎pkg/epp/backend/metrics/pod_metrics.go
Lines changed: 3 additions & 3 deletions b/‎pkg/epp/backend/metrics/pod_metrics.go
Lines changed: 3 additions & 3 deletions
diff --git a/‎pkg/epp/backend/metrics/pod_metrics_test.go
Lines changed: 5 additions & 5 deletions b/‎pkg/epp/backend/metrics/pod_metrics_test.go
Lines changed: 5 additions & 5 deletions
diff --git a/‎pkg/epp/backend/metrics/types.go
Lines changed: 2 additions & 57 deletions b/‎pkg/epp/backend/metrics/types.go
Lines changed: 2 additions & 57 deletions
@@ -31,7 +31,7 @@ import (
 // FakePodMetrics is an implementation of PodMetrics that doesn't run the async refresh loop.
 type FakePodMetrics struct {
 	Pod     *backend.Pod
-	Metrics *Metrics
+	Metrics *MetricsState
 }
 
 func (fpm *FakePodMetrics) String() string {
@@ -41,7 +41,7 @@ func (fpm *FakePodMetrics) String() string {
 func (fpm *FakePodMetrics) GetPod() *backend.Pod {
 	return fpm.Pod
 }
-func (fpm *FakePodMetrics) GetMetrics() *Metrics {
+func (fpm *FakePodMetrics) GetMetrics() *MetricsState {
 	return fpm.Metrics
 }
 func (fpm *FakePodMetrics) UpdatePod(pod *corev1.Pod) {
@@ -53,10 +53,10 @@ type FakePodMetricsClient struct {
 	errMu sync.RWMutex
 	Err   map[types.NamespacedName]error
 	resMu sync.RWMutex
-	Res   map[types.NamespacedName]*Metrics
+	Res   map[types.NamespacedName]*MetricsState
 }
 
-func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error) {
+func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
 	f.errMu.RLock()
 	err, ok := f.Err[pod.NamespacedName]
 	f.errMu.RUnlock()
@@ -73,7 +73,7 @@ func (f *FakePodMetricsClient) FetchMetrics(ctx context.Context, pod *backend.Po
 	return res.Clone(), nil
 }
 
-func (f *FakePodMetricsClient) SetRes(new map[types.NamespacedName]*Metrics) {
+func (f *FakePodMetricsClient) SetRes(new map[types.NamespacedName]*MetricsState) {
 	f.resMu.Lock()
 	defer f.resMu.Unlock()
 	f.Res = new
 
@@ -41,7 +41,7 @@ type PodMetricsClientImpl struct {
 }
 
 // FetchMetrics fetches metrics from a given pod, clones the existing metrics object and returns an updated one.
-func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error) {
+func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error) {
 	// Currently the metrics endpoint is hard-coded, which works with vLLM.
 	// TODO(https://github.com/kubernetes-sigs/gateway-api-inference-extension/issues/16): Consume this from InferencePool config.
 	url := "http://" + pod.Address + ":" + strconv.Itoa(int(port)) + "/metrics"
@@ -73,8 +73,8 @@ func (p *PodMetricsClientImpl) FetchMetrics(ctx context.Context, pod *backend.Po
 // promToPodMetrics updates internal pod metrics with scraped Prometheus metrics.
 func (p *PodMetricsClientImpl) promToPodMetrics(
 	metricFamilies map[string]*dto.MetricFamily,
-	existing *Metrics,
-) (*Metrics, error) {
+	existing *MetricsState,
+) (*MetricsState, error) {
 	var errs error
 	updated := existing.Clone()
 
 
@@ -0,0 +1,80 @@
+/*
+Copyright 2025 The Kubernetes Authors.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+*/
+
+package metrics
+
+import (
+	"fmt"
+	"time"
+)
+
+// newMetricsState initializes a new MetricsState and returns its pointer.
+func newMetricsState() *MetricsState {
+	return &MetricsState{
+		ActiveModels:  make(map[string]int),
+		WaitingModels: make(map[string]int),
+	}
+}
+
+// MetricsState holds the latest state of the metrics that were scraped from a pod.
+type MetricsState struct {
+	// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
+	ActiveModels  map[string]int
+	WaitingModels map[string]int
+	// MaxActiveModels is the maximum number of models that can be loaded to GPU.
+	MaxActiveModels         int
+	RunningQueueSize        int
+	WaitingQueueSize        int
+	KVCacheUsagePercent     float64
+	KvCacheMaxTokenCapacity int
+
+	// UpdateTime record the last time when the metrics were updated.
+	UpdateTime time.Time
+}
+
+// String returns a string with all MetricState information
+func (s *MetricsState) String() string {
+	if s == nil {
+		return ""
+	}
+	return fmt.Sprintf("%+v", *s)
+}
+
+// Clone creates a copy of MetricsState and returns its pointer.
+// Clone returns nil if the object being cloned is nil.
+func (s *MetricsState) Clone() *MetricsState {
+	if s == nil {
+		return nil
+	}
+	activeModels := make(map[string]int, len(s.ActiveModels))
+	for key, value := range s.ActiveModels {
+		activeModels[key] = value
+	}
+	waitingModels := make(map[string]int, len(s.WaitingModels))
+	for key, value := range s.WaitingModels {
+		waitingModels[key] = value
+	}
+	return &MetricsState{
+		ActiveModels:            activeModels,
+		WaitingModels:           waitingModels,
+		MaxActiveModels:         s.MaxActiveModels,
+		RunningQueueSize:        s.RunningQueueSize,
+		WaitingQueueSize:        s.WaitingQueueSize,
+		KVCacheUsagePercent:     s.KVCacheUsagePercent,
+		KvCacheMaxTokenCapacity: s.KvCacheMaxTokenCapacity,
+		UpdateTime:              s.UpdateTime,
+	}
+}
@@ -377,8 +377,8 @@ func TestPromToPodMetrics(t *testing.T) {
 		name            string
 		metricFamilies  map[string]*dto.MetricFamily
 		mapping         *MetricMapping
-		existingMetrics *Metrics
-		expectedMetrics *Metrics
+		existingMetrics *MetricsState
+		expectedMetrics *MetricsState
 		expectedErr     error // Count of expected errors
 	}{
 		{
@@ -401,8 +401,8 @@ func TestPromToPodMetrics(t *testing.T) {
 				KVCacheUtilization:  &MetricSpec{MetricName: "vllm_usage"},
 				LoraRequestInfo:     &MetricSpec{MetricName: "vllm:lora_requests_info"},
 			},
-			existingMetrics: &Metrics{},
-			expectedMetrics: &Metrics{
+			existingMetrics: &MetricsState{},
+			expectedMetrics: &MetricsState{
 				WaitingQueueSize:    7,
 				KVCacheUsagePercent: 0.8,
 				ActiveModels:        map[string]int{"lora1": 0, "lora2": 0},
@@ -418,8 +418,8 @@ func TestPromToPodMetrics(t *testing.T) {
 				KVCacheUtilization:  &MetricSpec{MetricName: "vllm_usage"},
 				LoraRequestInfo:     &MetricSpec{MetricName: "vllm:lora_requests_info"},
 			},
-			existingMetrics: &Metrics{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
-			expectedMetrics: &Metrics{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
+			existingMetrics: &MetricsState{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
+			expectedMetrics: &MetricsState{ActiveModels: map[string]int{}, WaitingModels: map[string]int{}},
 			expectedErr:     multierr.Combine(errors.New("metric family \"vllm_waiting\" not found"), errors.New("metric family \"vllm_usage\" not found"), errors.New("metric family \"vllm:lora_requests_info\" not found")),
 		},
 		{
@@ -437,8 +437,8 @@ func TestPromToPodMetrics(t *testing.T) {
 				KVCacheUtilization:  &MetricSpec{MetricName: "vllm_usage"},
 				LoraRequestInfo:     &MetricSpec{MetricName: "vllm:lora_requests_info"},
 			},
-			existingMetrics: &Metrics{},
-			expectedMetrics: &Metrics{
+			existingMetrics: &MetricsState{},
+			expectedMetrics: &MetricsState{
 				WaitingQueueSize:    0,
 				KVCacheUsagePercent: 0.8,
 				ActiveModels:        map[string]int{"lora1": 0, "lora2": 0},
@@ -457,8 +457,8 @@ func TestPromToPodMetrics(t *testing.T) {
 			mapping: &MetricMapping{
 				LoraRequestInfo: &MetricSpec{MetricName: "vllm:lora_requests_info"},
 			},
-			existingMetrics: &Metrics{},
-			expectedMetrics: &Metrics{
+			existingMetrics: &MetricsState{},
+			expectedMetrics: &MetricsState{
 				ActiveModels:    map[string]int{"lora1": 0},
 				WaitingModels:   map[string]int{},
 				MaxActiveModels: 0, // Should still default to 0.
@@ -494,7 +494,7 @@ func TestFetchMetrics(t *testing.T) {
 			Name:      "pod",
 		},
 	}
-	existing := &Metrics{}
+	existing := &MetricsState{}
 	p := &PodMetricsClientImpl{} // No MetricMapping needed for this basic test
 
 	_, err := p.FetchMetrics(ctx, pod, existing, 9999) // Use a port that's unlikely to be in use.
 
@@ -37,7 +37,7 @@ const (
 
 type podMetrics struct {
 	pod      atomic.Pointer[backend.Pod]
-	metrics  atomic.Pointer[Metrics]
+	metrics  atomic.Pointer[MetricsState]
 	pmc      PodMetricsClient
 	ds       Datastore
 	interval time.Duration
@@ -49,7 +49,7 @@ type podMetrics struct {
 }
 
 type PodMetricsClient interface {
-	FetchMetrics(ctx context.Context, pod *backend.Pod, existing *Metrics, port int32) (*Metrics, error)
+	FetchMetrics(ctx context.Context, pod *backend.Pod, existing *MetricsState, port int32) (*MetricsState, error)
 }
 
 func (pm *podMetrics) String() string {
@@ -60,7 +60,7 @@ func (pm *podMetrics) GetPod() *backend.Pod {
 	return pm.pod.Load()
 }
 
-func (pm *podMetrics) GetMetrics() *Metrics {
+func (pm *podMetrics) GetMetrics() *MetricsState {
 	return pm.metrics.Load()
 }
 
 
@@ -36,7 +36,7 @@ var (
 			Namespace: "default",
 		},
 	}
-	initial = &Metrics{
+	initial = &MetricsState{
 		WaitingQueueSize:    0,
 		KVCacheUsagePercent: 0.2,
 		MaxActiveModels:     2,
@@ -46,7 +46,7 @@ var (
 		},
 		WaitingModels: map[string]int{},
 	}
-	updated = &Metrics{
+	updated = &MetricsState{
 		WaitingQueueSize:    9999,
 		KVCacheUsagePercent: 0.99,
 		MaxActiveModels:     99,
@@ -69,16 +69,16 @@ func TestMetricsRefresh(t *testing.T) {
 	namespacedName := types.NamespacedName{Name: pod1.Name, Namespace: pod1.Namespace}
 	// Use SetRes to simulate an update of metrics from the pod.
 	// Verify that the metrics are updated.
-	pmc.SetRes(map[types.NamespacedName]*Metrics{namespacedName: initial})
+	pmc.SetRes(map[types.NamespacedName]*MetricsState{namespacedName: initial})
 	condition := func(collect *assert.CollectT) {
-		assert.True(collect, cmp.Equal(pm.GetMetrics(), initial, cmpopts.IgnoreFields(Metrics{}, "UpdateTime")))
+		assert.True(collect, cmp.Equal(pm.GetMetrics(), initial, cmpopts.IgnoreFields(MetricsState{}, "UpdateTime")))
 	}
 	assert.EventuallyWithT(t, condition, time.Second, time.Millisecond)
 
 	// Stop the loop, and simulate metric update again, this time the PodMetrics won't get the
 	// new update.
 	pm.StopRefreshLoop()
-	pmc.SetRes(map[types.NamespacedName]*Metrics{namespacedName: updated})
+	pmc.SetRes(map[types.NamespacedName]*MetricsState{namespacedName: updated})
 	// Still expect the same condition (no metrics update).
 	assert.EventuallyWithT(t, condition, time.Second, time.Millisecond)
 }
 
@@ -19,7 +19,6 @@ package metrics
 
 import (
 	"context"
-	"fmt"
 	"sync"
 	"time"
 
@@ -51,70 +50,16 @@ func (f *PodMetricsFactory) NewPodMetrics(parentCtx context.Context, in *corev1.
 		logger:   log.FromContext(parentCtx).WithValues("pod", pod.NamespacedName),
 	}
 	pm.pod.Store(pod)
-	pm.metrics.Store(newMetrics())
+	pm.metrics.Store(newMetricsState())
 
 	pm.startRefreshLoop(parentCtx)
 	return pm
 }
 
 type PodMetrics interface {
 	GetPod() *backend.Pod
-	GetMetrics() *Metrics
+	GetMetrics() *MetricsState
 	UpdatePod(*corev1.Pod)
 	StopRefreshLoop()
 	String() string
 }
-
-type Metrics struct {
-	// ActiveModels is a set of models(including LoRA adapters) that are currently cached to GPU.
-	ActiveModels  map[string]int
-	WaitingModels map[string]int
-	// MaxActiveModels is the maximum number of models that can be loaded to GPU.
-	MaxActiveModels         int
-	RunningQueueSize        int
-	WaitingQueueSize        int
-	KVCacheUsagePercent     float64
-	KvCacheMaxTokenCapacity int
-
-	// UpdateTime record the last time when the metrics were updated.
-	UpdateTime time.Time
-}
-
-func newMetrics() *Metrics {
-	return &Metrics{
-		ActiveModels:  make(map[string]int),
-		WaitingModels: make(map[string]int),
-	}
-}
-
-func (m *Metrics) String() string {
-	if m == nil {
-		return ""
-	}
-	return fmt.Sprintf("%+v", *m)
-}
-
-func (m *Metrics) Clone() *Metrics {
-	if m == nil {
-		return nil
-	}
-	cm := make(map[string]int, len(m.ActiveModels))
-	for k, v := range m.ActiveModels {
-		cm[k] = v
-	}
-	wm := make(map[string]int, len(m.WaitingModels))
-	for k, v := range m.WaitingModels {
-		wm[k] = v
-	}
-	clone := &Metrics{
-		ActiveModels:            cm,
-		WaitingModels:           wm,
-		MaxActiveModels:         m.MaxActiveModels,
-		RunningQueueSize:        m.RunningQueueSize,
-		WaitingQueueSize:        m.WaitingQueueSize,
-		KVCacheUsagePercent:     m.KVCacheUsagePercent,
-		KvCacheMaxTokenCapacity: m.KvCacheMaxTokenCapacity,
-		UpdateTime:              m.UpdateTime,
-	}
-	return clone
-}
Original file line number	Diff line number	Diff line change
`@@ -37,7 +37,7 @@ const (`
`37`	`37`
`38`	`38`	`type podMetrics struct {`
`39`	`39`	`pod atomic.Pointer[backend.Pod]`
`40`		`- metrics atomic.Pointer[Metrics]`
	`40`	`+ metrics atomic.Pointer[MetricsState]`
`41`	`41`	`pmc PodMetricsClient`
`42`	`42`	`ds Datastore`
`43`	`43`	`interval time.Duration`
`@@ -49,7 +49,7 @@ type podMetrics struct {`
`49`	`49`	`}`
`50`	`50`
`51`	`51`	`type PodMetricsClient interface {`
`52`		`- FetchMetrics(ctx context.Context, pod backend.Pod, existing Metrics, port int32) (*Metrics, error)`
	`52`	`+ FetchMetrics(ctx context.Context, pod backend.Pod, existing MetricsState, port int32) (*MetricsState, error)`
`53`	`53`	`}`
`54`	`54`
`55`	`55`	`func (pm *podMetrics) String() string {`
`@@ -60,7 +60,7 @@ func (pm podMetrics) GetPod() backend.Pod {`
`60`	`60`	`return pm.pod.Load()`
`61`	`61`	`}`
`62`	`62`
`63`		`-func (pm podMetrics) GetMetrics() Metrics {`
	`63`	`+func (pm podMetrics) GetMetrics() MetricsState {`
`64`	`64`	`return pm.metrics.Load()`
`65`	`65`	`}`
`66`	`66`