Skip to content

Commit 90329d0

Browse files
committed
Move integration and e2e tests for epp into epp-specific directories
1 parent 5b82374 commit 90329d0

File tree

6 files changed

+617
-4
lines changed

6 files changed

+617
-4
lines changed
Lines changed: 218 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,218 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package metrics
18+
19+
import (
20+
"context"
21+
"sync"
22+
"time"
23+
24+
compbasemetrics "k8s.io/component-base/metrics"
25+
"k8s.io/component-base/metrics/legacyregistry"
26+
"sigs.k8s.io/controller-runtime/pkg/log"
27+
logutil "sigs.k8s.io/gateway-api-inference-extension/pkg/epp/util/logging"
28+
)
29+
30+
const (
31+
InferenceModelComponent = "inference_model"
32+
InferencePoolComponent = "inference_pool"
33+
)
34+
35+
var (
36+
// Inference Model Metrics
37+
requestCounter = compbasemetrics.NewCounterVec(
38+
&compbasemetrics.CounterOpts{
39+
Subsystem: InferenceModelComponent,
40+
Name: "request_total",
41+
Help: "Counter of inference model requests broken out for each model and target model.",
42+
StabilityLevel: compbasemetrics.ALPHA,
43+
},
44+
[]string{"model_name", "target_model_name"},
45+
)
46+
47+
requestErrCounter = compbasemetrics.NewCounterVec(
48+
&compbasemetrics.CounterOpts{
49+
Subsystem: InferenceModelComponent,
50+
Name: "request_error_total",
51+
Help: "Counter of inference model requests errors broken out for each model and target model.",
52+
StabilityLevel: compbasemetrics.ALPHA,
53+
},
54+
[]string{"model_name", "target_model_name", "error_code"},
55+
)
56+
57+
requestLatencies = compbasemetrics.NewHistogramVec(
58+
&compbasemetrics.HistogramOpts{
59+
Subsystem: InferenceModelComponent,
60+
Name: "request_duration_seconds",
61+
Help: "Inference model response latency distribution in seconds for each model and target model.",
62+
Buckets: []float64{
63+
0.005, 0.025, 0.05, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 1.25, 1.5, 2, 3,
64+
4, 5, 6, 8, 10, 15, 20, 30, 45, 60, 120, 180, 240, 300, 360, 480, 600, 900, 1200, 1800, 2700, 3600,
65+
},
66+
StabilityLevel: compbasemetrics.ALPHA,
67+
},
68+
[]string{"model_name", "target_model_name"},
69+
)
70+
71+
requestSizes = compbasemetrics.NewHistogramVec(
72+
&compbasemetrics.HistogramOpts{
73+
Subsystem: InferenceModelComponent,
74+
Name: "request_sizes",
75+
Help: "Inference model requests size distribution in bytes for each model and target model.",
76+
// Use buckets ranging from 1000 bytes (1KB) to 10^9 bytes (1GB).
77+
Buckets: []float64{
78+
64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536, // More fine-grained up to 64KB
79+
131072, 262144, 524288, 1048576, 2097152, 4194304, 8388608, // Exponential up to 8MB
80+
16777216, 33554432, 67108864, 134217728, 268435456, 536870912, 1073741824, // Exponential up to 1GB
81+
},
82+
StabilityLevel: compbasemetrics.ALPHA,
83+
},
84+
[]string{"model_name", "target_model_name"},
85+
)
86+
87+
responseSizes = compbasemetrics.NewHistogramVec(
88+
&compbasemetrics.HistogramOpts{
89+
Subsystem: InferenceModelComponent,
90+
Name: "response_sizes",
91+
Help: "Inference model responses size distribution in bytes for each model and target model.",
92+
// Most models have a response token < 8192 tokens. Each token, in average, has 4 characters.
93+
// 8192 * 4 = 32768.
94+
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536},
95+
StabilityLevel: compbasemetrics.ALPHA,
96+
},
97+
[]string{"model_name", "target_model_name"},
98+
)
99+
100+
inputTokens = compbasemetrics.NewHistogramVec(
101+
&compbasemetrics.HistogramOpts{
102+
Subsystem: InferenceModelComponent,
103+
Name: "input_tokens",
104+
Help: "Inference model input token count distribution for requests in each model.",
105+
// Most models have a input context window less than 1 million tokens.
106+
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32778, 65536, 131072, 262144, 524288, 1048576},
107+
StabilityLevel: compbasemetrics.ALPHA,
108+
},
109+
[]string{"model_name", "target_model_name"},
110+
)
111+
112+
outputTokens = compbasemetrics.NewHistogramVec(
113+
&compbasemetrics.HistogramOpts{
114+
Subsystem: InferenceModelComponent,
115+
Name: "output_tokens",
116+
Help: "Inference model output token count distribution for requests in each model.",
117+
// Most models generates output less than 8192 tokens.
118+
Buckets: []float64{1, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192},
119+
StabilityLevel: compbasemetrics.ALPHA,
120+
},
121+
[]string{"model_name", "target_model_name"},
122+
)
123+
124+
// Inference Pool Metrics
125+
inferencePoolAvgKVCache = compbasemetrics.NewGaugeVec(
126+
&compbasemetrics.GaugeOpts{
127+
Subsystem: InferencePoolComponent,
128+
Name: "average_kv_cache_utilization",
129+
Help: "The average kv cache utilization for an inference server pool.",
130+
StabilityLevel: compbasemetrics.ALPHA,
131+
},
132+
[]string{"name"},
133+
)
134+
135+
inferencePoolAvgQueueSize = compbasemetrics.NewGaugeVec(
136+
&compbasemetrics.GaugeOpts{
137+
Subsystem: InferencePoolComponent,
138+
Name: "average_queue_size",
139+
Help: "The average number of requests pending in the model server queue.",
140+
StabilityLevel: compbasemetrics.ALPHA,
141+
},
142+
[]string{"name"},
143+
)
144+
)
145+
146+
var registerMetrics sync.Once
147+
148+
// Register all metrics.
149+
func Register() {
150+
registerMetrics.Do(func() {
151+
legacyregistry.MustRegister(requestCounter)
152+
legacyregistry.MustRegister(requestErrCounter)
153+
legacyregistry.MustRegister(requestLatencies)
154+
legacyregistry.MustRegister(requestSizes)
155+
legacyregistry.MustRegister(responseSizes)
156+
legacyregistry.MustRegister(inputTokens)
157+
legacyregistry.MustRegister(outputTokens)
158+
159+
legacyregistry.MustRegister(inferencePoolAvgKVCache)
160+
legacyregistry.MustRegister(inferencePoolAvgQueueSize)
161+
})
162+
}
163+
164+
// RecordRequstCounter records the number of requests.
165+
func RecordRequestCounter(modelName, targetModelName string) {
166+
requestCounter.WithLabelValues(modelName, targetModelName).Inc()
167+
}
168+
169+
// RecordRequestErrCounter records the number of error requests.
170+
func RecordRequestErrCounter(modelName, targetModelName string, code string) {
171+
if code != "" {
172+
requestErrCounter.WithLabelValues(modelName, targetModelName, code).Inc()
173+
}
174+
}
175+
176+
// RecordRequestSizes records the request sizes.
177+
func RecordRequestSizes(modelName, targetModelName string, reqSize int) {
178+
requestSizes.WithLabelValues(modelName, targetModelName).Observe(float64(reqSize))
179+
}
180+
181+
// RecordRequestLatencies records duration of request.
182+
func RecordRequestLatencies(ctx context.Context, modelName, targetModelName string, received time.Time, complete time.Time) bool {
183+
if !complete.After(received) {
184+
log.FromContext(ctx).V(logutil.DEFAULT).Error(nil, "Request latency values are invalid",
185+
"modelName", modelName, "targetModelName", targetModelName, "completeTime", complete, "receivedTime", received)
186+
return false
187+
}
188+
elapsedSeconds := complete.Sub(received).Seconds()
189+
requestLatencies.WithLabelValues(modelName, targetModelName).Observe(elapsedSeconds)
190+
return true
191+
}
192+
193+
// RecordResponseSizes records the response sizes.
194+
func RecordResponseSizes(modelName, targetModelName string, size int) {
195+
responseSizes.WithLabelValues(modelName, targetModelName).Observe(float64(size))
196+
}
197+
198+
// RecordInputTokens records input tokens count.
199+
func RecordInputTokens(modelName, targetModelName string, size int) {
200+
if size > 0 {
201+
inputTokens.WithLabelValues(modelName, targetModelName).Observe(float64(size))
202+
}
203+
}
204+
205+
// RecordOutputTokens records output tokens count.
206+
func RecordOutputTokens(modelName, targetModelName string, size int) {
207+
if size > 0 {
208+
outputTokens.WithLabelValues(modelName, targetModelName).Observe(float64(size))
209+
}
210+
}
211+
212+
func RecordInferencePoolAvgKVCache(name string, utilization float64) {
213+
inferencePoolAvgKVCache.WithLabelValues(name).Set(utilization)
214+
}
215+
216+
func RecordInferencePoolAvgQueueSize(name string, queueSize float64) {
217+
inferencePoolAvgQueueSize.WithLabelValues(name).Set(queueSize)
218+
}

0 commit comments

Comments
 (0)