@@ -98,7 +98,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
98
98
tests := []struct {
99
99
name string
100
100
requests []* extProcPb.ProcessingRequest
101
- pods map [backend.Pod ]* backendmetrics.Metrics
101
+ pods map [* backend.Pod ]* backendmetrics.Metrics
102
102
wantResponses []* extProcPb.ProcessingResponse
103
103
wantMetrics map [string ]string
104
104
wantErr bool
@@ -109,7 +109,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
109
109
name : "select lower queue and kv cache, no active lora" ,
110
110
requests : integrationutils .GenerateStreamedRequestSet (logger , "test1" , "my-model" ),
111
111
// pod-1 will be picked because it has relatively low queue size and low KV cache.
112
- pods : map [backend.Pod ]* backendmetrics.Metrics {
112
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
113
113
fakePod (0 ): {
114
114
WaitingQueueSize : 3 ,
115
115
KVCacheUsagePercent : 0.2 ,
@@ -184,7 +184,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
184
184
requests : integrationutils .GenerateStreamedRequestSet (logger , "test2" , "sql-lora" ),
185
185
// pod-1 will be picked because it has relatively low queue size, with the requested
186
186
// model being active, and has low KV cache.
187
- pods : map [backend.Pod ]* backendmetrics.Metrics {
187
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
188
188
fakePod (0 ): {
189
189
WaitingQueueSize : 0 ,
190
190
KVCacheUsagePercent : 0.2 ,
@@ -269,7 +269,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
269
269
// pod-2 will be picked despite it NOT having the requested model being active
270
270
// as it's above the affinity for queue size. Also is critical, so we should
271
271
// still honor request despite all queues > 5
272
- pods : map [backend.Pod ]* backendmetrics.Metrics {
272
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
273
273
fakePod (0 ): {
274
274
WaitingQueueSize : 10 ,
275
275
KVCacheUsagePercent : 0.2 ,
@@ -352,7 +352,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
352
352
requests : integrationutils .GenerateStreamedRequestSet (logger , "test4" , "sql-lora-sheddable" ),
353
353
// no pods will be picked as all models are either above kv threshold,
354
354
// queue threshold, or both.
355
- pods : map [backend.Pod ]* backendmetrics.Metrics {
355
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
356
356
fakePod (0 ): {
357
357
WaitingQueueSize : 6 ,
358
358
KVCacheUsagePercent : 0.2 ,
@@ -400,7 +400,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
400
400
name : "noncritical, but one server has capacity, do not shed" ,
401
401
requests : integrationutils .GenerateStreamedRequestSet (logger , "test5" , "sql-lora-sheddable" ),
402
402
// pod 0 will be picked as all other models are above threshold
403
- pods : map [backend.Pod ]* backendmetrics.Metrics {
403
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
404
404
fakePod (0 ): {
405
405
WaitingQueueSize : 4 ,
406
406
KVCacheUsagePercent : 0.2 ,
@@ -511,7 +511,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
511
511
512
512
//
513
513
// pod 0 will be picked as all other models are above threshold
514
- pods : map [backend.Pod ]* backendmetrics.Metrics {
514
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
515
515
fakePod (0 ): {
516
516
WaitingQueueSize : 4 ,
517
517
KVCacheUsagePercent : 0.2 ,
@@ -622,7 +622,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
622
622
623
623
//
624
624
// pod 0 will be picked as all other models are above threshold
625
- pods : map [backend.Pod ]* backendmetrics.Metrics {
625
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
626
626
fakePod (0 ): {
627
627
WaitingQueueSize : 4 ,
628
628
KVCacheUsagePercent : 0.2 ,
@@ -734,7 +734,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
734
734
735
735
//
736
736
// pod 0 will be picked as all other models are above threshold
737
- pods : map [backend.Pod ]* backendmetrics.Metrics {
737
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
738
738
fakePod (0 ): {
739
739
WaitingQueueSize : 4 ,
740
740
KVCacheUsagePercent : 0.2 ,
@@ -833,7 +833,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
833
833
834
834
//
835
835
// pod 0 will be picked as all other models are above threshold
836
- pods : map [backend.Pod ]* backendmetrics.Metrics {
836
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
837
837
fakePod (0 ): {
838
838
WaitingQueueSize : 4 ,
839
839
KVCacheUsagePercent : 0.2 ,
@@ -1181,7 +1181,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
1181
1181
DynamicMetadata : makeMetadata ("192.168.1.1:8000" ),
1182
1182
},
1183
1183
},
1184
- pods : map [backend.Pod ]* backendmetrics.Metrics {
1184
+ pods : map [* backend.Pod ]* backendmetrics.Metrics {
1185
1185
fakePod (0 ): {
1186
1186
WaitingQueueSize : 4 ,
1187
1187
KVCacheUsagePercent : 0.2 ,
@@ -1227,7 +1227,7 @@ func TestFullDuplexStreamed_KubeInferenceModelRequest(t *testing.T) {
1227
1227
}
1228
1228
}
1229
1229
1230
- func setUpHermeticServer (t * testing.T , podAndMetrics map [backend.Pod ]* backendmetrics.Metrics , streamed bool ) (client extProcPb.ExternalProcessor_ProcessClient , cleanup func ()) {
1230
+ func setUpHermeticServer (t * testing.T , podAndMetrics map [* backend.Pod ]* backendmetrics.Metrics , streamed bool ) (client extProcPb.ExternalProcessor_ProcessClient , cleanup func ()) {
1231
1231
// Reconfigure the TestPodMetricsClient.
1232
1232
res := map [types.NamespacedName ]* backendmetrics.Metrics {}
1233
1233
for pod , metrics := range podAndMetrics {
@@ -1305,10 +1305,11 @@ func setUpHermeticServer(t *testing.T, podAndMetrics map[backend.Pod]*backendmet
1305
1305
}
1306
1306
}
1307
1307
1308
- func fakePod (index int ) backend.Pod {
1309
- return backend.Pod {
1308
+ func fakePod (index int ) * backend.Pod {
1309
+ return & backend.Pod {
1310
1310
NamespacedName : types.NamespacedName {Name : fmt .Sprintf ("pod-%v" , index ), Namespace : "default" },
1311
1311
Address : fmt .Sprintf ("192.168.1.%d" , index + 1 ),
1312
+ Labels : make (map [string ]string , 0 ),
1312
1313
}
1313
1314
}
1314
1315
0 commit comments