Skip to content

Commit 892b7d0

Browse files
authored
Merge pull request #73 from nojnhuh/e2e-env
Verify environment variables in e2e test
2 parents cfe7e11 + 9c39b1e commit 892b7d0

File tree

1 file changed

+283
-1
lines changed

1 file changed

+283
-1
lines changed

test/e2e/e2e.sh

+283-1
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,43 @@ kubectl create -f demo/gpu-test3.yaml
2727
kubectl create -f demo/gpu-test4.yaml
2828
kubectl create -f demo/gpu-test5.yaml
2929

30+
function gpus-from-logs {
31+
local logs="$1"
32+
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_[[:digit:]]+=\"(.+)\"$/\1/p"
33+
}
34+
35+
function gpu-id {
36+
local gpu="$1"
37+
echo "$gpu" | sed -nE "s/^gpu-([[:digit:]]+)$/\1/p"
38+
}
39+
40+
function gpu-sharing-strategy-from-logs {
41+
local logs="$1"
42+
local id="$2"
43+
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_SHARING_STRATEGY=\"(.+)\"$/\1/p"
44+
}
45+
46+
function gpu-timeslice-interval-from-logs {
47+
local logs="$1"
48+
local id="$2"
49+
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_TIMESLICE_INTERVAL=\"(.+)\"$/\1/p"
50+
}
51+
52+
function gpu-partition-count-from-logs {
53+
local logs="$1"
54+
local id="$2"
55+
echo "$logs" | sed -nE "s/^declare -x GPU_DEVICE_${id}_PARTITION_COUNT=\"(.+)\"$/\1/p"
56+
}
57+
58+
declare -a observed_gpus
59+
function gpu-already-seen {
60+
local gpu="$1"
61+
for seen in "${observed_gpus[@]}"; do
62+
if [[ "$gpu" == "$seen" ]]; then return 0; fi;
63+
done
64+
return 1
65+
}
66+
3067
kubectl wait --for=condition=Ready -n gpu-test1 pod/pod0 --timeout=120s
3168
kubectl wait --for=condition=Ready -n gpu-test1 pod/pod1 --timeout=120s
3269
gpu_test_1=$(kubectl get pods -n gpu-test1 | grep -c 'Running')
@@ -35,6 +72,36 @@ if [ $gpu_test_1 != 2 ]; then
3572
exit 1
3673
fi
3774

75+
gpu_test1_pod0_ctr0_logs=$(kubectl logs -n gpu-test1 pod0 -c ctr0)
76+
gpu_test1_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod0_ctr0_logs")
77+
gpu_test1_pod0_ctr0_gpus_count=$(echo "$gpu_test1_pod0_ctr0_gpus" | wc -w)
78+
if [[ $gpu_test1_pod0_ctr0_gpus_count != 1 ]]; then
79+
echo "Expected Pod gpu-test1/pod0, container ctr0 to have 1 GPU, but got $gpu_test1_pod0_ctr0_gpus_count: $gpu_test1_pod0_ctr0_gpus"
80+
exit 1
81+
fi
82+
gpu_test1_pod0_ctr0_gpu="$gpu_test1_pod0_ctr0_gpus"
83+
if gpu-already-seen "$gpu_test1_pod0_ctr0_gpu"; then
84+
echo "Pod gpu-test1/pod0, container ctr0 should have a new GPU but claimed $gpu_test1_pod0_ctr0_gpu which is already claimed"
85+
exit 1
86+
fi
87+
echo "Pod gpu-test1/pod0, container ctr0 claimed $gpu_test1_pod0_ctr0_gpu"
88+
observed_gpus+=("$gpu_test1_pod0_ctr0_gpu")
89+
90+
gpu_test1_pod1_ctr0_logs=$(kubectl logs -n gpu-test1 pod1 -c ctr0)
91+
gpu_test1_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test1_pod1_ctr0_logs")
92+
gpu_test1_pod1_ctr0_gpus_count=$(echo "$gpu_test1_pod1_ctr0_gpus" | wc -w)
93+
if [[ $gpu_test1_pod1_ctr0_gpus_count != 1 ]]; then
94+
echo "Expected Pod gpu-test1/pod1, container ctr0 to have 1 GPU, but got $gpu_test1_pod1_ctr0_gpus_count: $gpu_test1_pod1_ctr0_gpus"
95+
exit 1
96+
fi
97+
gpu_test1_pod1_ctr0_gpu="$gpu_test1_pod1_ctr0_gpus"
98+
if gpu-already-seen "$gpu_test1_pod1_ctr0_gpu"; then
99+
echo "Pod gpu-test1/pod1, container ctr0 should have a new GPU but claimed $gpu_test1_pod1_ctr0_gpu which is already claimed"
100+
exit 1
101+
fi
102+
echo "Pod gpu-test1/pod1, container ctr0 claimed $gpu_test1_pod1_ctr0_gpu"
103+
observed_gpus+=("$gpu_test1_pod1_ctr0_gpu")
104+
38105

39106
kubectl wait --for=condition=Ready -n gpu-test2 pod/pod0 --timeout=120s
40107
gpu_test_2=$(kubectl get pods -n gpu-test2 | grep -c 'Running')
@@ -43,28 +110,243 @@ if [ $gpu_test_2 != 1 ]; then
43110
exit 1
44111
fi
45112

113+
gpu_test2_pod0_ctr0_logs=$(kubectl logs -n gpu-test2 pod0 -c ctr0)
114+
gpu_test2_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test2_pod0_ctr0_logs")
115+
gpu_test2_pod0_ctr0_gpus_count=$(echo "$gpu_test2_pod0_ctr0_gpus" | wc -w)
116+
if [[ $gpu_test2_pod0_ctr0_gpus_count != 2 ]]; then
117+
echo "Expected Pod gpu-test2/pod0, container ctr0 to have 2 GPUs, but got $gpu_test2_pod0_ctr0_gpus_count: $gpu_test2_pod0_ctr0_gpus"
118+
exit 1
119+
fi
120+
echo "$gpu_test2_pod0_ctr0_gpus" | while read gpu_test2_pod0_ctr0_gpu; do
121+
if gpu-already-seen "$gpu_test2_pod0_ctr0_gpu"; then
122+
echo "Pod gpu-test2/pod0, container ctr0 should have a new GPU but claimed $gpu_test2_pod0_ctr0_gpu which is already claimed"
123+
exit 1
124+
fi
125+
echo "Pod gpu-test2/pod0, container ctr0 claimed $gpu_test2_pod0_ctr0_gpu"
126+
observed_gpus+=("$gpu_test2_pod0_ctr0_gpu")
127+
done
128+
129+
46130
kubectl wait --for=condition=Ready -n gpu-test3 pod/pod0 --timeout=120s
47131
gpu_test_3=$(kubectl get pods -n gpu-test3 | grep -c 'Running')
48132
if [ $gpu_test_3 != 1 ]; then
49133
echo "gpu_test_3 $gpu_test_3 failed to match against 1 expected pod"
50134
exit 1
51135
fi
52136

137+
gpu_test3_pod0_ctr0_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr0)
138+
gpu_test3_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr0_logs")
139+
gpu_test3_pod0_ctr0_gpus_count=$(echo "$gpu_test3_pod0_ctr0_gpus" | wc -w)
140+
if [[ $gpu_test3_pod0_ctr0_gpus_count != 1 ]]; then
141+
echo "Expected Pod gpu-test3/pod0, container ctr0 to have 1 GPU, but got $gpu_test3_pod0_ctr0_gpus_count: $gpu_test3_pod0_ctr0_gpus"
142+
exit 1
143+
fi
144+
gpu_test3_pod0_ctr0_gpu="$gpu_test3_pod0_ctr0_gpus"
145+
if gpu-already-seen "$gpu_test3_pod0_ctr0_gpu"; then
146+
echo "Pod gpu-test3/pod0, container ctr0 should have a new GPU but claimed $gpu_test3_pod0_ctr0_gpu which is already claimed"
147+
exit 1
148+
fi
149+
echo "Pod gpu-test3/pod0, container ctr0 claimed $gpu_test3_pod0_ctr0_gpu"
150+
observed_gpus+=("$gpu_test3_pod0_ctr0_gpu")
151+
gpu_test3_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
152+
if [[ "$gpu_test3_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
153+
echo "Expected Pod gpu-test3/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr0_sharing_strategy"
154+
exit 1
155+
fi
156+
gpu_test3_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr0_logs" $(gpu-id "$gpu_test3_pod0_ctr0_gpu"))
157+
if [[ "$gpu_test3_pod0_ctr0_timeslice_interval" != "Default" ]]; then
158+
echo "Expected Pod gpu-test3/pod0, container ctr0 to have timeslice interval Default, got $gpu_test3_pod0_ctr0_timeslice_interval"
159+
exit 1
160+
fi
161+
162+
gpu_test3_pod0_ctr1_logs=$(kubectl logs -n gpu-test3 pod0 -c ctr1)
163+
gpu_test3_pod0_ctr1_gpus=$(gpus-from-logs "$gpu_test3_pod0_ctr1_logs")
164+
gpu_test3_pod0_ctr1_gpus_count=$(echo "$gpu_test3_pod0_ctr1_gpus" | wc -w)
165+
if [[ $gpu_test3_pod0_ctr1_gpus_count != 1 ]]; then
166+
echo "Expected Pod gpu-test3/pod0, container ctr1 to have 1 GPU, but got $gpu_test3_pod0_ctr1_gpus_count: $gpu_test3_pod0_ctr1_gpus"
167+
exit 1
168+
fi
169+
gpu_test3_pod0_ctr1_gpu="$gpu_test3_pod0_ctr1_gpus"
170+
echo "Pod gpu-test3/pod0, container ctr1 claimed $gpu_test3_pod0_ctr1_gpu"
171+
if [[ "$gpu_test3_pod0_ctr1_gpu" != "$gpu_test3_pod0_ctr0_gpu" ]]; then
172+
echo "Pod gpu-test3/pod0, container ctr1 should claim the same GPU as Pod gpu-test3/pod0, container ctr0, but did not"
173+
exit 1
174+
fi
175+
gpu_test3_pod0_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
176+
if [[ "$gpu_test3_pod0_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
177+
echo "Expected Pod gpu-test3/pod0, container ctr1 to have sharing strategy TimeSlicing, got $gpu_test3_pod0_ctr1_sharing_strategy"
178+
exit 1
179+
fi
180+
gpu_test3_pod0_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test3_pod0_ctr1_logs" $(gpu-id "$gpu_test3_pod0_ctr1_gpu"))
181+
if [[ "$gpu_test3_pod0_ctr1_timeslice_interval" != "Default" ]]; then
182+
echo "Expected Pod gpu-test3/pod0, container ctr1 to have timeslice interval Default, got $gpu_test3_pod0_ctr1_timeslice_interval"
183+
exit 1
184+
fi
185+
186+
53187
kubectl wait --for=condition=Ready -n gpu-test4 pod/pod0 --timeout=120s
54188
kubectl wait --for=condition=Ready -n gpu-test4 pod/pod1 --timeout=120s
55189
gpu_test_4=$(kubectl get pods -n gpu-test4 | grep -c 'Running')
56190
if [ $gpu_test_4 != 2 ]; then
57-
echo "gpu_test_4 $gpu_test_4 failed to match against 1 expected pods"
191+
echo "gpu_test_4 $gpu_test_4 failed to match against 2 expected pods"
58192
exit 1
59193
fi
60194

195+
gpu_test4_pod0_ctr0_logs=$(kubectl logs -n gpu-test4 pod0 -c ctr0)
196+
gpu_test4_pod0_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod0_ctr0_logs")
197+
gpu_test4_pod0_ctr0_gpus_count=$(echo "$gpu_test4_pod0_ctr0_gpus" | wc -w)
198+
if [[ $gpu_test4_pod0_ctr0_gpus_count != 1 ]]; then
199+
echo "Expected Pod gpu-test4/pod0, container ctr0 to have 1 GPU, but got $gpu_test4_pod0_ctr0_gpus_count: $gpu_test4_pod0_ctr0_gpus"
200+
exit 1
201+
fi
202+
gpu_test4_pod0_ctr0_gpu="$gpu_test4_pod0_ctr0_gpus"
203+
if gpu-already-seen "$gpu_test4_pod0_ctr0_gpu"; then
204+
echo "Pod gpu-test4/pod0, container ctr0 should have a new GPU but claimed $gpu_test4_pod0_ctr0_gpu which is already claimed"
205+
exit 1
206+
fi
207+
echo "Pod gpu-test4/pod0, container ctr0 claimed $gpu_test4_pod0_ctr0_gpu"
208+
observed_gpus+=("$gpu_test4_pod0_ctr0_gpu")
209+
gpu_test4_pod0_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
210+
if [[ "$gpu_test4_pod0_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
211+
echo "Expected Pod gpu-test4/pod0, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod0_ctr0_sharing_strategy"
212+
exit 1
213+
fi
214+
gpu_test4_pod0_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod0_ctr0_logs" $(gpu-id "$gpu_test4_pod0_ctr0_gpu"))
215+
if [[ "$gpu_test4_pod0_ctr0_timeslice_interval" != "Default" ]]; then
216+
echo "Expected Pod gpu-test4/pod0, container ctr0 to have timeslice interval Default, got $gpu_test4_pod0_ctr0_timeslice_interval"
217+
exit 1
218+
fi
219+
220+
gpu_test4_pod1_ctr0_logs=$(kubectl logs -n gpu-test4 pod1 -c ctr0)
221+
gpu_test4_pod1_ctr0_gpus=$(gpus-from-logs "$gpu_test4_pod1_ctr0_logs")
222+
gpu_test4_pod1_ctr0_gpus_count=$(echo "$gpu_test4_pod1_ctr0_gpus" | wc -w)
223+
if [[ $gpu_test4_pod1_ctr0_gpus_count != 1 ]]; then
224+
echo "Expected Pod gpu-test4/pod1, container ctr0 to have 1 GPU, but got $gpu_test4_pod1_ctr0_gpus_count: $gpu_test4_pod1_ctr0_gpus"
225+
exit 1
226+
fi
227+
gpu_test4_pod1_ctr0_gpu="$gpu_test4_pod1_ctr0_gpus"
228+
echo "Pod gpu-test4/pod1, container ctr0 claimed $gpu_test4_pod1_ctr0_gpu"
229+
if [[ "$gpu_test4_pod1_ctr0_gpu" != "$gpu_test4_pod1_ctr0_gpu" ]]; then
230+
echo "Pod gpu-test4/pod1, container ctr0 should claim the same GPU as Pod gpu-test4/pod1, container ctr0, but did not"
231+
exit 1
232+
fi
233+
gpu_test4_pod1_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
234+
if [[ "$gpu_test4_pod1_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
235+
echo "Expected Pod gpu-test4/pod1, container ctr0 to have sharing strategy TimeSlicing, got $gpu_test4_pod1_ctr0_sharing_strategy"
236+
exit 1
237+
fi
238+
gpu_test4_pod1_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test4_pod1_ctr0_logs" $(gpu-id "$gpu_test4_pod1_ctr0_gpu"))
239+
if [[ "$gpu_test4_pod1_ctr0_timeslice_interval" != "Default" ]]; then
240+
echo "Expected Pod gpu-test4/pod1, container ctr0 to have timeslice interval Default, got $gpu_test4_pod1_ctr0_timeslice_interval"
241+
exit 1
242+
fi
243+
244+
61245
kubectl wait --for=condition=Ready -n gpu-test5 pod/pod0 --timeout=120s
62246
gpu_test_5=$(kubectl get pods -n gpu-test5 | grep -c 'Running')
63247
if [ $gpu_test_5 != 1 ]; then
64248
echo "gpu_test_5 $gpu_test_5 failed to match against 1 expected pod"
65249
exit 1
66250
fi
67251

252+
gpu_test5_pod0_ts_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr0)
253+
gpu_test5_pod0_ts_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr0_logs")
254+
gpu_test5_pod0_ts_ctr0_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr0_gpus" | wc -w)
255+
if [[ $gpu_test5_pod0_ts_ctr0_gpus_count != 1 ]]; then
256+
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr0_gpus_count: $gpu_test5_pod0_ts_ctr0_gpus"
257+
exit 1
258+
fi
259+
gpu_test5_pod0_ts_ctr0_gpu="$gpu_test5_pod0_ts_ctr0_gpus"
260+
if gpu-already-seen "$gpu_test5_pod0_ts_ctr0_gpu"; then
261+
echo "Pod gpu-test5/pod0, container ts-ctr0 should have a new GPU but claimed $gpu_test5_pod0_ts_ctr0_gpu which is already claimed"
262+
exit 1
263+
fi
264+
echo "Pod gpu-test5/pod0, container ts-ctr0 claimed $gpu_test5_pod0_ts_ctr0_gpu"
265+
observed_gpus+=("$gpu_test5_pod0_ts_ctr0_gpu")
266+
gpu_test5_pod0_ts_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
267+
if [[ "$gpu_test5_pod0_ts_ctr0_sharing_strategy" != "TimeSlicing" ]]; then
268+
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr0_sharing_strategy"
269+
exit 1
270+
fi
271+
gpu_test5_pod0_ts_ctr0_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr0_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr0_gpu"))
272+
if [[ "$gpu_test5_pod0_ts_ctr0_timeslice_interval" != "Long" ]]; then
273+
echo "Expected Pod gpu-test5/pod0, container ts-ctr0 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr0_timeslice_interval"
274+
exit 1
275+
fi
276+
277+
gpu_test5_pod0_ts_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c ts-ctr1)
278+
gpu_test5_pod0_ts_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_ts_ctr1_logs")
279+
gpu_test5_pod0_ts_ctr1_gpus_count=$(echo "$gpu_test5_pod0_ts_ctr1_gpus" | wc -w)
280+
if [[ $gpu_test5_pod0_ts_ctr1_gpus_count != 1 ]]; then
281+
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have 1 GPU, but got $gpu_test5_pod0_ts_ctr1_gpus_count: $gpu_test5_pod0_ts_ctr1_gpus"
282+
exit 1
283+
fi
284+
gpu_test5_pod0_ts_ctr1_gpu="$gpu_test5_pod0_ts_ctr1_gpus"
285+
echo "Pod gpu-test5/pod0, container ts-ctr1 claimed $gpu_test5_pod0_ts_ctr1_gpu"
286+
if [[ "$gpu_test5_pod0_ts_ctr1_gpu" != "$gpu_test5_pod0_ts_ctr0_gpu" ]]; then
287+
echo "Pod gpu-test5/pod0, container ts-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container ts-ctr0, but did not"
288+
exit 1
289+
fi
290+
gpu_test5_pod0_ts_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
291+
if [[ "$gpu_test5_pod0_ts_ctr1_sharing_strategy" != "TimeSlicing" ]]; then
292+
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have sharing strategy TimeSlicing, got $gpu_test5_pod0_ts_ctr1_sharing_strategy"
293+
exit 1
294+
fi
295+
gpu_test5_pod0_ts_ctr1_timeslice_interval=$(gpu-timeslice-interval-from-logs "$gpu_test5_pod0_ts_ctr1_logs" $(gpu-id "$gpu_test5_pod0_ts_ctr1_gpu"))
296+
if [[ "$gpu_test5_pod0_ts_ctr1_timeslice_interval" != "Long" ]]; then
297+
echo "Expected Pod gpu-test5/pod0, container ts-ctr1 to have timeslice interval Long, got $gpu_test5_pod0_ts_ctr1_timeslice_interval"
298+
exit 1
299+
fi
300+
301+
gpu_test5_pod0_sp_ctr0_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr0)
302+
gpu_test5_pod0_sp_ctr0_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr0_logs")
303+
gpu_test5_pod0_sp_ctr0_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr0_gpus" | wc -w)
304+
if [[ $gpu_test5_pod0_sp_ctr0_gpus_count != 1 ]]; then
305+
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr0_gpus_count: $gpu_test5_pod0_sp_ctr0_gpus"
306+
exit 1
307+
fi
308+
gpu_test5_pod0_sp_ctr0_gpu="$gpu_test5_pod0_sp_ctr0_gpus"
309+
if gpu-already-seen "$gpu_test5_pod0_sp_ctr0_gpu"; then
310+
echo "Pod gpu-test5/pod0, container sp-ctr0 should have a new GPU but claimed $gpu_test5_pod0_sp_ctr0_gpu which is already claimed"
311+
exit 1
312+
fi
313+
echo "Pod gpu-test5/pod0, container sp-ctr0 claimed $gpu_test5_pod0_sp_ctr0_gpu"
314+
observed_gpus+=("$gpu_test5_pod0_sp_ctr0_gpu")
315+
gpu_test5_pod0_sp_ctr0_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
316+
if [[ "$gpu_test5_pod0_sp_ctr0_sharing_strategy" != "SpacePartitioning" ]]; then
317+
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr0_sharing_strategy"
318+
exit 1
319+
fi
320+
gpu_test5_pod0_sp_ctr0_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr0_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr0_gpu"))
321+
if [[ "$gpu_test5_pod0_sp_ctr0_partition_count" != "10" ]]; then
322+
echo "Expected Pod gpu-test5/pod0, container sp-ctr0 to have partition count 10, got $gpu_test5_pod0_sp_ctr0_partition_count"
323+
exit 1
324+
fi
325+
326+
gpu_test5_pod0_sp_ctr1_logs=$(kubectl logs -n gpu-test5 pod0 -c sp-ctr1)
327+
gpu_test5_pod0_sp_ctr1_gpus=$(gpus-from-logs "$gpu_test5_pod0_sp_ctr1_logs")
328+
gpu_test5_pod0_sp_ctr1_gpus_count=$(echo "$gpu_test5_pod0_sp_ctr1_gpus" | wc -w)
329+
if [[ $gpu_test5_pod0_sp_ctr1_gpus_count != 1 ]]; then
330+
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have 1 GPU, but got $gpu_test5_pod0_sp_ctr1_gpus_count: $gpu_test5_pod0_sp_ctr1_gpus"
331+
exit 1
332+
fi
333+
gpu_test5_pod0_sp_ctr1_gpu="$gpu_test5_pod0_sp_ctr1_gpus"
334+
echo "Pod gpu-test5/pod0, container sp-ctr1 claimed $gpu_test5_pod0_sp_ctr1_gpu"
335+
if [[ "$gpu_test5_pod0_sp_ctr1_gpu" != "$gpu_test5_pod0_sp_ctr0_gpu" ]]; then
336+
echo "Pod gpu-test5/pod0, container sp-ctr1 should claim the same GPU as Pod gpu-test5/pod0, container sp-ctr0, but did not"
337+
exit 1
338+
fi
339+
gpu_test5_pod0_sp_ctr1_sharing_strategy=$(gpu-sharing-strategy-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
340+
if [[ "$gpu_test5_pod0_sp_ctr1_sharing_strategy" != "SpacePartitioning" ]]; then
341+
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have sharing strategy SpacePartitioning, got $gpu_test5_pod0_sp_ctr1_sharing_strategy"
342+
exit 1
343+
fi
344+
gpu_test5_pod0_sp_ctr1_partition_count=$(gpu-partition-count-from-logs "$gpu_test5_pod0_sp_ctr1_logs" $(gpu-id "$gpu_test5_pod0_sp_ctr1_gpu"))
345+
if [[ "$gpu_test5_pod0_sp_ctr1_partition_count" != "10" ]]; then
346+
echo "Expected Pod gpu-test5/pod0, container sp-ctr1 to have partition count 10, got $gpu_test5_pod0_sp_ctr1_partition_count"
347+
exit 1
348+
fi
349+
68350
# test that deletion is fast (less than the default grace period of 30s)
69351
# see https://github.com/kubernetes/kubernetes/issues/127188 for details
70352
kubectl delete -f demo/gpu-test1.yaml --timeout=25s

0 commit comments

Comments
 (0)