@@ -27,6 +27,43 @@ kubectl create -f demo/gpu-test3.yaml
27
27
kubectl create -f demo/gpu-test4.yaml
28
28
kubectl create -f demo/gpu-test5.yaml
29
29
30
+ function gpus-from-logs {
31
+ local logs=" $1 "
32
+ echo " $logs " | sed -nE " s/^declare -x GPU_DEVICE_[[:digit:]]+=\" (.+)\" $/\1/p"
33
+ }
34
+
35
+ function gpu-id {
36
+ local gpu=" $1 "
37
+ echo " $gpu " | sed -nE " s/^gpu-([[:digit:]]+)$/\1/p"
38
+ }
39
+
40
+ function gpu-sharing-strategy-from-logs {
41
+ local logs=" $1 "
42
+ local id=" $2 "
43
+ echo " $logs " | sed -nE " s/^declare -x GPU_DEVICE_${id} _SHARING_STRATEGY=\" (.+)\" $/\1/p"
44
+ }
45
+
46
+ function gpu-timeslice-interval-from-logs {
47
+ local logs=" $1 "
48
+ local id=" $2 "
49
+ echo " $logs " | sed -nE " s/^declare -x GPU_DEVICE_${id} _TIMESLICE_INTERVAL=\" (.+)\" $/\1/p"
50
+ }
51
+
52
+ function gpu-partition-count-from-logs {
53
+ local logs=" $1 "
54
+ local id=" $2 "
55
+ echo " $logs " | sed -nE " s/^declare -x GPU_DEVICE_${id} _PARTITION_COUNT=\" (.+)\" $/\1/p"
56
+ }
57
+
58
+ declare -a observed_gpus
59
+ function gpu-already-seen {
60
+ local gpu=" $1 "
61
+ for seen in " ${observed_gpus[@]} " ; do
62
+ if [[ " $gpu " == " $seen " ]]; then return 0; fi ;
63
+ done
64
+ return 1
65
+ }
66
+
30
67
kubectl wait --for=condition=Ready -n gpu-test1 pod/pod0 --timeout=120s
31
68
kubectl wait --for=condition=Ready -n gpu-test1 pod/pod1 --timeout=120s
32
69
gpu_test_1=$( kubectl get pods -n gpu-test1 | grep -c ' Running' )
@@ -35,6 +72,36 @@ if [ $gpu_test_1 != 2 ]; then
35
72
exit 1
36
73
fi
37
74
75
+ gpu_test1_pod0_ctr0_logs=$( kubectl logs -n gpu-test1 pod0 -c ctr0)
76
+ gpu_test1_pod0_ctr0_gpus=$( gpus-from-logs " $gpu_test1_pod0_ctr0_logs " )
77
+ gpu_test1_pod0_ctr0_gpus_count=$( echo " $gpu_test1_pod0_ctr0_gpus " | wc -w)
78
+ if [[ $gpu_test1_pod0_ctr0_gpus_count != 1 ]]; then
79
+ echo " Expected Pod gpu-test1/pod0, container ctr0 to have 1 GPU, but got $gpu_test1_pod0_ctr0_gpus_count : $gpu_test1_pod0_ctr0_gpus "
80
+ exit 1
81
+ fi
82
+ gpu_test1_pod0_ctr0_gpu=" $gpu_test1_pod0_ctr0_gpus "
83
+ if gpu-already-seen " $gpu_test1_pod0_ctr0_gpu " ; then
84
+ echo " Pod gpu-test1/pod0, container ctr0 should have a new GPU but claimed $gpu_test1_pod0_ctr0_gpu which is already claimed"
85
+ exit 1
86
+ fi
87
+ echo " Pod gpu-test1/pod0, container ctr0 claimed $gpu_test1_pod0_ctr0_gpu "
88
+ observed_gpus+=(" $gpu_test1_pod0_ctr0_gpu " )
89
+
90
+ gpu_test1_pod1_ctr0_logs=$( kubectl logs -n gpu-test1 pod1 -c ctr0)
91
+ gpu_test1_pod1_ctr0_gpus=$( gpus-from-logs " $gpu_test1_pod1_ctr0_logs " )
92
+ gpu_test1_pod1_ctr0_gpus_count=$( echo " $gpu_test1_pod1_ctr0_gpus " | wc -w)
93
+ if [[ $gpu_test1_pod1_ctr0_gpus_count != 1 ]]; then
94
+ echo " Expected Pod gpu-test1/pod1, container ctr0 to have 1 GPU, but got $gpu_test1_pod1_ctr0_gpus_count : $gpu_test1_pod1_ctr0_gpus "
95
+ exit 1
96
+ fi
97
+ gpu_test1_pod1_ctr0_gpu=" $gpu_test1_pod1_ctr0_gpus "
98
+ if gpu-already-seen " $gpu_test1_pod1_ctr0_gpu " ; then
99
+ echo " Pod gpu-test1/pod1, container ctr0 should have a new GPU but claimed $gpu_test1_pod1_ctr0_gpu which is already claimed"
100
+ exit 1
101
+ fi
102
+ echo " Pod gpu-test1/pod1, container ctr0 claimed $gpu_test1_pod1_ctr0_gpu "
103
+ observed_gpus+=(" $gpu_test1_pod1_ctr0_gpu " )
104
+
38
105
39
106
kubectl wait --for=condition=Ready -n gpu-test2 pod/pod0 --timeout=120s
40
107
gpu_test_2=$( kubectl get pods -n gpu-test2 | grep -c ' Running' )
@@ -43,28 +110,243 @@ if [ $gpu_test_2 != 1 ]; then
43
110
exit 1
44
111
fi
45
112
113
+ gpu_test2_pod0_ctr0_logs=$( kubectl logs -n gpu-test2 pod0 -c ctr0)
114
+ gpu_test2_pod0_ctr0_gpus=$( gpus-from-logs " $gpu_test2_pod0_ctr0_logs " )
115
+ gpu_test2_pod0_ctr0_gpus_count=$( echo " $gpu_test2_pod0_ctr0_gpus " | wc -w)
116
+ if [[ $gpu_test2_pod0_ctr0_gpus_count != 2 ]]; then
117
+ echo " Expected Pod gpu-test2/pod0, container ctr0 to have 2 GPUs, but got $gpu_test2_pod0_ctr0_gpus_count : $gpu_test2_pod0_ctr0_gpus "
118
+ exit 1
119
+ fi
120
+ echo " $gpu_test2_pod0_ctr0_gpus " | while read gpu_test2_pod0_ctr0_gpu; do
121
+ if gpu-already-seen " $gpu_test2_pod0_ctr0_gpu " ; then
122
+ echo " Pod gpu-test2/pod0, container ctr0 should have a new GPU but claimed $gpu_test2_pod0_ctr0_gpu which is already claimed"
123
+ exit 1
124
+ fi
125
+ echo " Pod gpu-test2/pod0, container ctr0 claimed $gpu_test2_pod0_ctr0_gpu "
126
+ observed_gpus+=(" $gpu_test2_pod0_ctr0_gpu " )
127
+ done
128
+
129
+
46
130
kubectl wait --for=condition=Ready -n gpu-test3 pod/pod0 --timeout=120s
47
131
gpu_test_3=$( kubectl get pods -n gpu-test3 | grep -c ' Running' )
48
132
if [ $gpu_test_3 != 1 ]; then
49
133
echo " gpu_test_3 $gpu_test_3 failed to match against 1 expected pod"
50
134
exit 1
51
135
fi
52
136
137
+ gpu_test3_pod0_ctr0_logs=$( kubectl logs -n gpu-test3 pod0 -c ctr0)
138
+ gpu_test3_pod0_ctr0_gpus=$( gpus-from-logs " $gpu_test3_pod0_ctr0_logs " )
139
+ gpu_test3_pod0_ctr0_gpus_count=$( echo " $gpu_test3_pod0_ctr0_gpus " | wc -w)
140
+ if [[ $gpu_test3_pod0_ctr0_gpus_count != 1 ]]; then
141
+ echo " Expected Pod gpu-test3/pod0, container ctr0 to have 1 GPU, but got $gpu_test3_pod0_ctr0_gpus_count : $gpu_test3_pod0_ctr0_gpus "
142
+ exit 1
143
+ fi
144
+ gpu_test3_pod0_ctr0_gpu=" $gpu_test3_pod0_ctr0_gpus "
145
+ if gpu-already-seen " $gpu_test3_pod0_ctr0_gpu " ; then
146
+ echo " Pod gpu-test3/pod0, container ctr0 should have a new GPU but claimed $gpu_test3_pod0_ctr0_gpu which is already claimed"
147
+ exit 1
148
+ fi
149
+ echo " Pod gpu-test3/pod0, container ctr0 claimed $gpu_test3_pod0_ctr0_gpu "
150
+ observed_gpus+=(" $gpu_test3_pod0_ctr0_gpu " )
151
+ gpu_test3_pod0_ctr0_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test3_pod0_ctr0_logs " $( gpu-id " $gpu_test3_pod0_ctr0_gpu " ) )
152
+ if [[ " $gpu_test3_pod0_ctr0_sharing_strategy " != " TimeSlicing" ]]; then
153
+ echo " Expected Pod gpu-test3/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr0_sharing_strategy "
154
+ exit 1
155
+ fi
156
+ gpu_test3_pod0_ctr0_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test3_pod0_ctr0_logs " $( gpu-id " $gpu_test3_pod0_ctr0_gpu " ) )
157
+ if [[ " $gpu_test3_pod0_ctr0_timeslice_interval " != " Default" ]]; then
158
+ echo " Expected Pod gpu-test3/pod0, container ctr0 to have timeslice interval Default, got $gpu_test3_pod0_ctr0_timeslice_interval "
159
+ exit 1
160
+ fi
161
+
162
+ gpu_test3_pod0_ctr1_logs=$( kubectl logs -n gpu-test3 pod0 -c ctr1)
163
+ gpu_test3_pod0_ctr1_gpus=$( gpus-from-logs " $gpu_test3_pod0_ctr1_logs " )
164
+ gpu_test3_pod0_ctr1_gpus_count=$( echo " $gpu_test3_pod0_ctr1_gpus " | wc -w)
165
+ if [[ $gpu_test3_pod0_ctr1_gpus_count != 1 ]]; then
166
+ echo " Expected Pod gpu-test3/pod0, container ctr1 to have 1 GPU, but got $gpu_test3_pod0_ctr1_gpus_count : $gpu_test3_pod0_ctr1_gpus "
167
+ exit 1
168
+ fi
169
+ gpu_test3_pod0_ctr1_gpu=" $gpu_test3_pod0_ctr1_gpus "
170
+ echo " Pod gpu-test3/pod0, container ctr1 claimed $gpu_test3_pod0_ctr1_gpu "
171
+ if [[ " $gpu_test3_pod0_ctr1_gpu " != " $gpu_test3_pod0_ctr0_gpu " ]]; then
172
+ echo " Pod gpu-test3/pod0, container ctr1 should claim the same GPU as Pod gpu-test3/pod0, container ctr0, but did not"
173
+ exit 1
174
+ fi
175
+ gpu_test3_pod0_ctr1_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test3_pod0_ctr1_logs " $( gpu-id " $gpu_test3_pod0_ctr1_gpu " ) )
176
+ if [[ " $gpu_test3_pod0_ctr1_sharing_strategy " != " TimeSlicing" ]]; then
177
+ echo " Expected Pod gpu-test3/pod0, container ctr1 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr1_sharing_strategy "
178
+ exit 1
179
+ fi
180
+ gpu_test3_pod0_ctr1_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test3_pod0_ctr1_logs " $( gpu-id " $gpu_test3_pod0_ctr1_gpu " ) )
181
+ if [[ " $gpu_test3_pod0_ctr1_timeslice_interval " != " Default" ]]; then
182
+ echo " Expected Pod gpu-test3/pod0, container ctr1 to have timeslice interval Default, got $gpu_test3_pod0_ctr1_timeslice_interval "
183
+ exit 1
184
+ fi
185
+
186
+
53
187
kubectl wait --for=condition=Ready -n gpu-test4 pod/pod0 --timeout=120s
54
188
kubectl wait --for=condition=Ready -n gpu-test4 pod/pod1 --timeout=120s
55
189
gpu_test_4=$( kubectl get pods -n gpu-test4 | grep -c ' Running' )
56
190
if [ $gpu_test_4 != 2 ]; then
57
- echo " gpu_test_4 $gpu_test_4 failed to match against 1 expected pods"
191
+ echo " gpu_test_4 $gpu_test_4 failed to match against 2 expected pods"
58
192
exit 1
59
193
fi
60
194
195
+ gpu_test4_pod0_ctr0_logs=$( kubectl logs -n gpu-test4 pod0 -c ctr0)
196
+ gpu_test4_pod0_ctr0_gpus=$( gpus-from-logs " $gpu_test4_pod0_ctr0_logs " )
197
+ gpu_test4_pod0_ctr0_gpus_count=$( echo " $gpu_test4_pod0_ctr0_gpus " | wc -w)
198
+ if [[ $gpu_test4_pod0_ctr0_gpus_count != 1 ]]; then
199
+ echo " Expected Pod gpu-test4/pod0, container ctr0 to have 1 GPU, but got $gpu_test4_pod0_ctr0_gpus_count : $gpu_test4_pod0_ctr0_gpus "
200
+ exit 1
201
+ fi
202
+ gpu_test4_pod0_ctr0_gpu=" $gpu_test4_pod0_ctr0_gpus "
203
+ if gpu-already-seen " $gpu_test4_pod0_ctr0_gpu " ; then
204
+ echo " Pod gpu-test4/pod0, container ctr0 should have a new GPU but claimed $gpu_test4_pod0_ctr0_gpu which is already claimed"
205
+ exit 1
206
+ fi
207
+ echo " Pod gpu-test4/pod0, container ctr0 claimed $gpu_test4_pod0_ctr0_gpu "
208
+ observed_gpus+=(" $gpu_test4_pod0_ctr0_gpu " )
209
+ gpu_test4_pod0_ctr0_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test4_pod0_ctr0_logs " $( gpu-id " $gpu_test4_pod0_ctr0_gpu " ) )
210
+ if [[ " $gpu_test4_pod0_ctr0_sharing_strategy " != " TimeSlicing" ]]; then
211
+ echo " Expected Pod gpu-test4/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod0_ctr0_sharing_strategy "
212
+ exit 1
213
+ fi
214
+ gpu_test4_pod0_ctr0_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test4_pod0_ctr0_logs " $( gpu-id " $gpu_test4_pod0_ctr0_gpu " ) )
215
+ if [[ " $gpu_test4_pod0_ctr0_timeslice_interval " != " Default" ]]; then
216
+ echo " Expected Pod gpu-test4/pod0, container ctr0 to have timeslice interval Default, got $gpu_test4_pod0_ctr0_timeslice_interval "
217
+ exit 1
218
+ fi
219
+
220
+ gpu_test4_pod1_ctr0_logs=$( kubectl logs -n gpu-test4 pod1 -c ctr0)
221
+ gpu_test4_pod1_ctr0_gpus=$( gpus-from-logs " $gpu_test4_pod1_ctr0_logs " )
222
+ gpu_test4_pod1_ctr0_gpus_count=$( echo " $gpu_test4_pod1_ctr0_gpus " | wc -w)
223
+ if [[ $gpu_test4_pod1_ctr0_gpus_count != 1 ]]; then
224
+ echo " Expected Pod gpu-test4/pod1, container ctr0 to have 1 GPU, but got $gpu_test4_pod1_ctr0_gpus_count : $gpu_test4_pod1_ctr0_gpus "
225
+ exit 1
226
+ fi
227
+ gpu_test4_pod1_ctr0_gpu=" $gpu_test4_pod1_ctr0_gpus "
228
+ echo " Pod gpu-test4/pod1, container ctr0 claimed $gpu_test4_pod1_ctr0_gpu "
229
+ if [[ " $gpu_test4_pod1_ctr0_gpu " != " $gpu_test4_pod1_ctr0_gpu " ]]; then
230
+ echo " Pod gpu-test4/pod1, container ctr0 should claim the same GPU as Pod gpu-test4/pod1, container ctr0, but did not"
231
+ exit 1
232
+ fi
233
+ gpu_test4_pod1_ctr0_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test4_pod1_ctr0_logs " $( gpu-id " $gpu_test4_pod1_ctr0_gpu " ) )
234
+ if [[ " $gpu_test4_pod1_ctr0_sharing_strategy " != " TimeSlicing" ]]; then
235
+ echo " Expected Pod gpu-test4/pod1, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod1_ctr0_sharing_strategy "
236
+ exit 1
237
+ fi
238
+ gpu_test4_pod1_ctr0_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test4_pod1_ctr0_logs " $( gpu-id " $gpu_test4_pod1_ctr0_gpu " ) )
239
+ if [[ " $gpu_test4_pod1_ctr0_timeslice_interval " != " Default" ]]; then
240
+ echo " Expected Pod gpu-test4/pod1, container ctr0 to have timeslice interval Default, got $gpu_test4_pod1_ctr0_timeslice_interval "
241
+ exit 1
242
+ fi
243
+
244
+
61
245
kubectl wait --for=condition=Ready -n gpu-test5 pod/pod0 --timeout=120s
62
246
gpu_test_5=$( kubectl get pods -n gpu-test5 | grep -c ' Running' )
63
247
if [ $gpu_test_5 != 1 ]; then
64
248
echo " gpu_test_5 $gpu_test_5 failed to match against 1 expected pod"
65
249
exit 1
66
250
fi
67
251
252
+ gpu_test5_pod0_ts_ctr0_logs=$( kubectl logs -n gpu-test5 pod0 -c ts-ctr0)
253
+ gpu_test5_pod0_ts_ctr0_gpus=$( gpus-from-logs " $gpu_test5_pod0_ts_ctr0_logs " )
254
+ gpu_test5_pod0_ts_ctr0_gpus_count=$( echo " $gpu_test5_pod0_ts_ctr0_gpus " | wc -w)
255
+ if [[ $gpu_test5_pod0_ts_ctr0_gpus_count != 1 ]]; then
256
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr0 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr0_gpus_count : $gpu_test5_pod0_ts_ctr0_gpus "
257
+ exit 1
258
+ fi
259
+ gpu_test5_pod0_ts_ctr0_gpu=" $gpu_test5_pod0_ts_ctr0_gpus "
260
+ if gpu-already-seen " $gpu_test5_pod0_ts_ctr0_gpu " ; then
261
+ echo " Pod gpu-test5/pod0, container ts-ctr0 should have a new GPU but claimed $gpu_test5_pod0_ts_ctr0_gpu which is already claimed"
262
+ exit 1
263
+ fi
264
+ echo " Pod gpu-test5/pod0, container ts-ctr0 claimed $gpu_test5_pod0_ts_ctr0_gpu "
265
+ observed_gpus+=(" $gpu_test5_pod0_ts_ctr0_gpu " )
266
+ gpu_test5_pod0_ts_ctr0_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test5_pod0_ts_ctr0_logs " $( gpu-id " $gpu_test5_pod0_ts_ctr0_gpu " ) )
267
+ if [[ " $gpu_test5_pod0_ts_ctr0_sharing_strategy " != " TimeSlicing" ]]; then
268
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr0 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr0_sharing_strategy "
269
+ exit 1
270
+ fi
271
+ gpu_test5_pod0_ts_ctr0_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test5_pod0_ts_ctr0_logs " $( gpu-id " $gpu_test5_pod0_ts_ctr0_gpu " ) )
272
+ if [[ " $gpu_test5_pod0_ts_ctr0_timeslice_interval " != " Long" ]]; then
273
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr0 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr0_timeslice_interval "
274
+ exit 1
275
+ fi
276
+
277
+ gpu_test5_pod0_ts_ctr1_logs=$( kubectl logs -n gpu-test5 pod0 -c ts-ctr1)
278
+ gpu_test5_pod0_ts_ctr1_gpus=$( gpus-from-logs " $gpu_test5_pod0_ts_ctr1_logs " )
279
+ gpu_test5_pod0_ts_ctr1_gpus_count=$( echo " $gpu_test5_pod0_ts_ctr1_gpus " | wc -w)
280
+ if [[ $gpu_test5_pod0_ts_ctr1_gpus_count != 1 ]]; then
281
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr1 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr1_gpus_count : $gpu_test5_pod0_ts_ctr1_gpus "
282
+ exit 1
283
+ fi
284
+ gpu_test5_pod0_ts_ctr1_gpu=" $gpu_test5_pod0_ts_ctr1_gpus "
285
+ echo " Pod gpu-test5/pod0, container ts-ctr1 claimed $gpu_test5_pod0_ts_ctr1_gpu "
286
+ if [[ " $gpu_test5_pod0_ts_ctr1_gpu " != " $gpu_test5_pod0_ts_ctr0_gpu " ]]; then
287
+ echo " Pod gpu-test5/pod0, container ts-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container ts-ctr0, but did not"
288
+ exit 1
289
+ fi
290
+ gpu_test5_pod0_ts_ctr1_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test5_pod0_ts_ctr1_logs " $( gpu-id " $gpu_test5_pod0_ts_ctr1_gpu " ) )
291
+ if [[ " $gpu_test5_pod0_ts_ctr1_sharing_strategy " != " TimeSlicing" ]]; then
292
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr1 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr1_sharing_strategy "
293
+ exit 1
294
+ fi
295
+ gpu_test5_pod0_ts_ctr1_timeslice_interval=$( gpu-timeslice-interval-from-logs " $gpu_test5_pod0_ts_ctr1_logs " $( gpu-id " $gpu_test5_pod0_ts_ctr1_gpu " ) )
296
+ if [[ " $gpu_test5_pod0_ts_ctr1_timeslice_interval " != " Long" ]]; then
297
+ echo " Expected Pod gpu-test5/pod0, container ts-ctr1 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr1_timeslice_interval "
298
+ exit 1
299
+ fi
300
+
301
+ gpu_test5_pod0_sp_ctr0_logs=$( kubectl logs -n gpu-test5 pod0 -c sp-ctr0)
302
+ gpu_test5_pod0_sp_ctr0_gpus=$( gpus-from-logs " $gpu_test5_pod0_sp_ctr0_logs " )
303
+ gpu_test5_pod0_sp_ctr0_gpus_count=$( echo " $gpu_test5_pod0_sp_ctr0_gpus " | wc -w)
304
+ if [[ $gpu_test5_pod0_sp_ctr0_gpus_count != 1 ]]; then
305
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr0 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr0_gpus_count : $gpu_test5_pod0_sp_ctr0_gpus "
306
+ exit 1
307
+ fi
308
+ gpu_test5_pod0_sp_ctr0_gpu=" $gpu_test5_pod0_sp_ctr0_gpus "
309
+ if gpu-already-seen " $gpu_test5_pod0_sp_ctr0_gpu " ; then
310
+ echo " Pod gpu-test5/pod0, container sp-ctr0 should have a new GPU but claimed $gpu_test5_pod0_sp_ctr0_gpu which is already claimed"
311
+ exit 1
312
+ fi
313
+ echo " Pod gpu-test5/pod0, container sp-ctr0 claimed $gpu_test5_pod0_sp_ctr0_gpu "
314
+ observed_gpus+=(" $gpu_test5_pod0_sp_ctr0_gpu " )
315
+ gpu_test5_pod0_sp_ctr0_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test5_pod0_sp_ctr0_logs " $( gpu-id " $gpu_test5_pod0_sp_ctr0_gpu " ) )
316
+ if [[ " $gpu_test5_pod0_sp_ctr0_sharing_strategy " != " SpacePartitioning" ]]; then
317
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr0 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr0_sharing_strategy "
318
+ exit 1
319
+ fi
320
+ gpu_test5_pod0_sp_ctr0_partition_count=$( gpu-partition-count-from-logs " $gpu_test5_pod0_sp_ctr0_logs " $( gpu-id " $gpu_test5_pod0_sp_ctr0_gpu " ) )
321
+ if [[ " $gpu_test5_pod0_sp_ctr0_partition_count " != " 10" ]]; then
322
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr0 to have partition count 10, got $gpu_test5_pod0_sp_ctr0_partition_count "
323
+ exit 1
324
+ fi
325
+
326
+ gpu_test5_pod0_sp_ctr1_logs=$( kubectl logs -n gpu-test5 pod0 -c sp-ctr1)
327
+ gpu_test5_pod0_sp_ctr1_gpus=$( gpus-from-logs " $gpu_test5_pod0_sp_ctr1_logs " )
328
+ gpu_test5_pod0_sp_ctr1_gpus_count=$( echo " $gpu_test5_pod0_sp_ctr1_gpus " | wc -w)
329
+ if [[ $gpu_test5_pod0_sp_ctr1_gpus_count != 1 ]]; then
330
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr1 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr1_gpus_count : $gpu_test5_pod0_sp_ctr1_gpus "
331
+ exit 1
332
+ fi
333
+ gpu_test5_pod0_sp_ctr1_gpu=" $gpu_test5_pod0_sp_ctr1_gpus "
334
+ echo " Pod gpu-test5/pod0, container sp-ctr1 claimed $gpu_test5_pod0_sp_ctr1_gpu "
335
+ if [[ " $gpu_test5_pod0_sp_ctr1_gpu " != " $gpu_test5_pod0_sp_ctr0_gpu " ]]; then
336
+ echo " Pod gpu-test5/pod0, container sp-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container sp-ctr0, but did not"
337
+ exit 1
338
+ fi
339
+ gpu_test5_pod0_sp_ctr1_sharing_strategy=$( gpu-sharing-strategy-from-logs " $gpu_test5_pod0_sp_ctr1_logs " $( gpu-id " $gpu_test5_pod0_sp_ctr1_gpu " ) )
340
+ if [[ " $gpu_test5_pod0_sp_ctr1_sharing_strategy " != " SpacePartitioning" ]]; then
341
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr1 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr1_sharing_strategy "
342
+ exit 1
343
+ fi
344
+ gpu_test5_pod0_sp_ctr1_partition_count=$( gpu-partition-count-from-logs " $gpu_test5_pod0_sp_ctr1_logs " $( gpu-id " $gpu_test5_pod0_sp_ctr1_gpu " ) )
345
+ if [[ " $gpu_test5_pod0_sp_ctr1_partition_count " != " 10" ]]; then
346
+ echo " Expected Pod gpu-test5/pod0, container sp-ctr1 to have partition count 10, got $gpu_test5_pod0_sp_ctr1_partition_count "
347
+ exit 1
348
+ fi
349
+
68
350
# test that deletion is fast (less than the default grace period of 30s)
69
351
# see https://github.com/kubernetes/kubernetes/issues/127188 for details
70
352
kubectl delete -f demo/gpu-test1.yaml --timeout=25s
0 commit comments