You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Help: "Inference model number of running requests in each model.",
133
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model number of running requests in each model."),
131
134
},
132
135
[]string{"model_name"},
133
136
)
@@ -137,7 +140,7 @@ var (
137
140
prometheus.HistogramOpts{
138
141
Subsystem: InferenceModelComponent,
139
142
Name: "normalized_time_per_output_token_seconds",
140
-
Help: "Inference model latency divided by number of output tokens in seconds for each model and target model.",
143
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Inference model latency divided by number of output tokens in seconds for each model and target model."),
141
144
// From few milliseconds per token to multiple seconds per token
Help: "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.",
197
+
Help: fmt.Sprintf("[%v] %v", compbasemetrics.ALPHA, "Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name."),
Copy file name to clipboardExpand all lines: pkg/epp/metrics/testdata/normalized_time_per_output_token_seconds_metric
+1-1Lines changed: 1 addition & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -1,4 +1,4 @@
1
-
# HELP inference_model_normalized_time_per_output_token_seconds Inference model latency divided by number of output tokens in seconds for each model and target model.
1
+
# HELP inference_model_normalized_time_per_output_token_seconds [ALPHA] Inference model latency divided by number of output tokens in seconds for each model and target model.
2
2
# TYPE inference_model_normalized_time_per_output_token_seconds histogram
Copy file name to clipboardExpand all lines: pkg/epp/metrics/testdata/scheduler_plugin_processing_latencies_metric
+1-1Lines changed: 1 addition & 1 deletion
Original file line number
Diff line number
Diff line change
@@ -1,4 +1,4 @@
1
-
# HELP inference_extension_scheduler_plugin_duration_seconds Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
1
+
# HELP inference_extension_scheduler_plugin_duration_seconds [ALPHA] Scheduler plugin processing latency distribution in seconds for each plugin type and plugin name.
2
2
# TYPE inference_extension_scheduler_plugin_duration_seconds histogram
0 commit comments