Skip to content

Commit ec735af

Browse files
authored
add labels to pod metadata for the use of scheduler plugins (#779)
Signed-off-by: Nir Rozenbaum <[email protected]>
1 parent 6a2a3ec commit ec735af

File tree

3 files changed

+18
-14
lines changed

3 files changed

+18
-14
lines changed

pkg/epp/backend/metrics/pod_metrics.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,7 @@ func toInternalPod(in *corev1.Pod) *backend.Pod {
7575
Namespace: in.Namespace,
7676
},
7777
Address: in.Status.PodIP,
78+
Labels: in.Labels,
7879
}
7980
}
8081

pkg/epp/backend/pod.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
type Pod struct {
2323
NamespacedName types.NamespacedName
2424
Address string
25+
Labels map[string]string
2526
}
2627

2728
func (p *Pod) String() string {
@@ -41,5 +42,6 @@ func (p *Pod) Clone() *Pod {
4142
Namespace: p.NamespacedName.Namespace,
4243
},
4344
Address: p.Address,
45+
Labels: p.Labels,
4446
}
4547
}

test/integration/epp/hermetic_test.go

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
9898
tests := []struct {
9999
name string
100100
requests []*extProcPb.ProcessingRequest
101-
pods map[backend.Pod]*backendmetrics.Metrics
101+
pods map[*backend.Pod]*backendmetrics.Metrics
102102
wantResponses []*extProcPb.ProcessingResponse
103103
wantMetrics map[string]string
104104
wantErr bool
@@ -109,7 +109,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
109109
name: "select lower queue and kv cache, no active lora",
110110
requests: integrationutils.GenerateStreamedRequestSet(logger, "test1", "my-model"),
111111
// pod-1 will be picked because it has relatively low queue size and low KV cache.
112-
pods: map[backend.Pod]*backendmetrics.Metrics{
112+
pods: map[*backend.Pod]*backendmetrics.Metrics{
113113
fakePod(0): {
114114
WaitingQueueSize: 3,
115115
KVCacheUsagePercent: 0.2,
@@ -184,7 +184,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
184184
requests: integrationutils.GenerateStreamedRequestSet(logger, "test2", "sql-lora"),
185185
// pod-1 will be picked because it has relatively low queue size, with the requested
186186
// model being active, and has low KV cache.
187-
pods: map[backend.Pod]*backendmetrics.Metrics{
187+
pods: map[*backend.Pod]*backendmetrics.Metrics{
188188
fakePod(0): {
189189
WaitingQueueSize: 0,
190190
KVCacheUsagePercent: 0.2,
@@ -269,7 +269,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
269269
// pod-2 will be picked despite it NOT having the requested model being active
270270
// as it's above the affinity for queue size. Also is critical, so we should
271271
// still honor request despite all queues > 5
272-
pods: map[backend.Pod]*backendmetrics.Metrics{
272+
pods: map[*backend.Pod]*backendmetrics.Metrics{
273273
fakePod(0): {
274274
WaitingQueueSize: 10,
275275
KVCacheUsagePercent: 0.2,
@@ -352,7 +352,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
352352
requests: integrationutils.GenerateStreamedRequestSet(logger, "test4", "sql-lora-sheddable"),
353353
// no pods will be picked as all models are either above kv threshold,
354354
// queue threshold, or both.
355-
pods: map[backend.Pod]*backendmetrics.Metrics{
355+
pods: map[*backend.Pod]*backendmetrics.Metrics{
356356
fakePod(0): {
357357
WaitingQueueSize: 6,
358358
KVCacheUsagePercent: 0.2,
@@ -400,7 +400,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
400400
name: "noncritical, but one server has capacity, do not shed",
401401
requests: integrationutils.GenerateStreamedRequestSet(logger, "test5", "sql-lora-sheddable"),
402402
// pod 0 will be picked as all other models are above threshold
403-
pods: map[backend.Pod]*backendmetrics.Metrics{
403+
pods: map[*backend.Pod]*backendmetrics.Metrics{
404404
fakePod(0): {
405405
WaitingQueueSize: 4,
406406
KVCacheUsagePercent: 0.2,
@@ -511,7 +511,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
511511

512512
//
513513
// pod 0 will be picked as all other models are above threshold
514-
pods: map[backend.Pod]*backendmetrics.Metrics{
514+
pods: map[*backend.Pod]*backendmetrics.Metrics{
515515
fakePod(0): {
516516
WaitingQueueSize: 4,
517517
KVCacheUsagePercent: 0.2,
@@ -622,7 +622,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
622622

623623
//
624624
// pod 0 will be picked as all other models are above threshold
625-
pods: map[backend.Pod]*backendmetrics.Metrics{
625+
pods: map[*backend.Pod]*backendmetrics.Metrics{
626626
fakePod(0): {
627627
WaitingQueueSize: 4,
628628
KVCacheUsagePercent: 0.2,
@@ -734,7 +734,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
734734

735735
//
736736
// pod 0 will be picked as all other models are above threshold
737-
pods: map[backend.Pod]*backendmetrics.Metrics{
737+
pods: map[*backend.Pod]*backendmetrics.Metrics{
738738
fakePod(0): {
739739
WaitingQueueSize: 4,
740740
KVCacheUsagePercent: 0.2,
@@ -833,7 +833,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
833833

834834
//
835835
// pod 0 will be picked as all other models are above threshold
836-
pods: map[backend.Pod]*backendmetrics.Metrics{
836+
pods: map[*backend.Pod]*backendmetrics.Metrics{
837837
fakePod(0): {
838838
WaitingQueueSize: 4,
839839
KVCacheUsagePercent: 0.2,
@@ -1181,7 +1181,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
11811181
DynamicMetadata: makeMetadata("192.168.1.1:8000"),
11821182
},
11831183
},
1184-
pods: map[backend.Pod]*backendmetrics.Metrics{
1184+
pods: map[*backend.Pod]*backendmetrics.Metrics{
11851185
fakePod(0): {
11861186
WaitingQueueSize: 4,
11871187
KVCacheUsagePercent: 0.2,
@@ -1227,7 +1227,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
12271227
}
12281228
}
12291229

1230-
func setUpHermeticServer(t *testing.T, podAndMetrics map[backend.Pod]*backendmetrics.Metrics, streamed bool) (client extProcPb.ExternalProcessor_ProcessClient, cleanup func()) {
1230+
func setUpHermeticServer(t *testing.T, podAndMetrics map[*backend.Pod]*backendmetrics.Metrics, streamed bool) (client extProcPb.ExternalProcessor_ProcessClient, cleanup func()) {
12311231
// Reconfigure the TestPodMetricsClient.
12321232
res := map[types.NamespacedName]*backendmetrics.Metrics{}
12331233
for pod, metrics := range podAndMetrics {
@@ -1305,10 +1305,11 @@ func setUpHermeticServer(t *testing.T, podAndMetrics map[backend.Pod]*backendmet
13051305
}
13061306
}
13071307

1308-
func fakePod(index int) backend.Pod {
1309-
return backend.Pod{
1308+
func fakePod(index int) *backend.Pod {
1309+
return &backend.Pod{
13101310
NamespacedName: types.NamespacedName{Name: fmt.Sprintf("pod-%v", index), Namespace: "default"},
13111311
Address: fmt.Sprintf("192.168.1.%d", index+1),
1312+
Labels: make(map[string]string, 0),
13121313
}
13131314
}
13141315

0 commit comments

Comments
 (0)