Skip to content

Commit 8cc427a

Browse files
authored
Merge pull request #58 from klueska/refactor-opaque-configs
Refactor how opaque device configs are handled
2 parents f4bf71f + 53be214 commit 8cc427a

File tree

8 files changed

+199
-144
lines changed

8 files changed

+199
-144
lines changed

.github/workflows/e2e.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ jobs:
1212
- name: Install Go
1313
uses: actions/setup-go@v4
1414
with:
15-
go-version: 1.22.6
15+
go-version: 1.23.1
1616
- name: Checkout code
1717
uses: actions/checkout@v3
1818
- name: Build

.github/workflows/tests.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ jobs:
99
test:
1010
strategy:
1111
matrix:
12-
version: ['1.22.2' ]
12+
version: ['1.23.1' ]
1313
platform: [ ubuntu-latest, macos-latest ]
1414
runs-on: ${{ matrix.platform }}
1515
steps:

README.md

+39-39
Original file line numberDiff line numberDiff line change
@@ -127,7 +127,7 @@ items:
127127
string: gpu-18db0e85-99e9-c746-8531-ffeb86328b39
128128
capacity:
129129
memory: 80Gi
130-
name: gpu-18db0e85-99e9-c746-8531-ffeb86328b39
130+
name: gpu-0
131131
- basic:
132132
attributes:
133133
driverVersion:
@@ -140,7 +140,7 @@ items:
140140
string: gpu-93d37703-997c-c46f-a531-755e3e0dc2ac
141141
capacity:
142142
memory: 80Gi
143-
name: gpu-93d37703-997c-c46f-a531-755e3e0dc2ac
143+
name: gpu-1
144144
- basic:
145145
attributes:
146146
driverVersion:
@@ -153,7 +153,7 @@ items:
153153
string: gpu-ee3e4b55-fcda-44b8-0605-64b7a9967744
154154
capacity:
155155
memory: 80Gi
156-
name: gpu-ee3e4b55-fcda-44b8-0605-64b7a9967744
156+
name: gpu-2
157157
- basic:
158158
attributes:
159159
driverVersion:
@@ -166,7 +166,7 @@ items:
166166
string: gpu-9ede7e32-5825-a11b-fa3d-bab6d47e0243
167167
capacity:
168168
memory: 80Gi
169-
name: gpu-9ede7e32-5825-a11b-fa3d-bab6d47e0243
169+
name: gpu-3
170170
- basic:
171171
attributes:
172172
driverVersion:
@@ -179,7 +179,7 @@ items:
179179
string: gpu-e7b42cb1-4fd8-91b2-bc77-352a0c1f5747
180180
capacity:
181181
memory: 80Gi
182-
name: gpu-e7b42cb1-4fd8-91b2-bc77-352a0c1f5747
182+
name: gpu-4
183183
- basic:
184184
attributes:
185185
driverVersion:
@@ -192,7 +192,7 @@ items:
192192
string: gpu-f11773a1-5bfb-e48b-3d98-1beb5baaf08e
193193
capacity:
194194
memory: 80Gi
195-
name: gpu-f11773a1-5bfb-e48b-3d98-1beb5baaf08e
195+
name: gpu-5
196196
- basic:
197197
attributes:
198198
driverVersion:
@@ -205,7 +205,7 @@ items:
205205
string: gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac
206206
capacity:
207207
memory: 80Gi
208-
name: gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac
208+
name: gpu-6
209209
- basic:
210210
attributes:
211211
driverVersion:
@@ -218,7 +218,7 @@ items:
218218
string: gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b
219219
capacity:
220220
memory: 80Gi
221-
name: gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b
221+
name: gpu-7
222222
kind: List
223223
metadata:
224224
resourceVersion: ""
@@ -261,9 +261,9 @@ for example in $(seq 1 5); do \
261261
for ctr in $(kubectl get pod -n gpu-test${example} ${pod} -o jsonpath='{.spec.containers[*].name}'); do \
262262
echo "${pod} ${ctr}:"
263263
if [ "${example}" -lt 3 ]; then
264-
kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+="
264+
kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+=" | grep -v "RESOURCE_CLAIM"
265265
else
266-
kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+"
266+
kubectl logs -n gpu-test${example} ${pod} -c ${ctr}| grep -E "GPU_DEVICE_[0-9]+" | grep -v "RESOURCE_CLAIM"
267267
fi
268268
done
269269
done
@@ -275,60 +275,60 @@ This should produce output similar to the following:
275275
```bash
276276
gpu-test1:
277277
pod0 ctr0:
278-
declare -x GPU_DEVICE_0="gpu-ee3e4b55-fcda-44b8-0605-64b7a9967744"
278+
declare -x GPU_DEVICE_6="gpu-6"
279279
pod1 ctr0:
280-
declare -x GPU_DEVICE_0="gpu-9ede7e32-5825-a11b-fa3d-bab6d47e0243"
280+
declare -x GPU_DEVICE_7="gpu-7"
281281

282282
gpu-test2:
283283
pod0 ctr0:
284-
declare -x GPU_DEVICE_0="gpu-e7b42cb1-4fd8-91b2-bc77-352a0c1f5747"
285-
declare -x GPU_DEVICE_1="gpu-f11773a1-5bfb-e48b-3d98-1beb5baaf08e"
284+
declare -x GPU_DEVICE_0="gpu-0"
285+
declare -x GPU_DEVICE_1="gpu-1"
286286

287287
gpu-test3:
288288
pod0 ctr0:
289-
declare -x GPU_DEVICE_0="gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac"
290-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
291-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
289+
declare -x GPU_DEVICE_2="gpu-2"
290+
declare -x GPU_DEVICE_2_SHARING_STRATEGY="TimeSlicing"
291+
declare -x GPU_DEVICE_2_TIMESLICE_INTERVAL="Default"
292292
pod0 ctr1:
293-
declare -x GPU_DEVICE_0="gpu-0159f35e-99ee-b2b5-74f1-9d18df3f22ac"
294-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
295-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
293+
declare -x GPU_DEVICE_2="gpu-2"
294+
declare -x GPU_DEVICE_2_SHARING_STRATEGY="TimeSlicing"
295+
declare -x GPU_DEVICE_2_TIMESLICE_INTERVAL="Default"
296296

297297
gpu-test4:
298298
pod0 ctr0:
299-
declare -x GPU_DEVICE_0="gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b"
300-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
301-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
299+
declare -x GPU_DEVICE_3="gpu-3"
300+
declare -x GPU_DEVICE_3_SHARING_STRATEGY="TimeSlicing"
301+
declare -x GPU_DEVICE_3_TIMESLICE_INTERVAL="Default"
302302
pod1 ctr0:
303-
declare -x GPU_DEVICE_0="gpu-657bd2e7-f5c2-a7f2-fbaa-0d1cdc32f81b"
304-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
305-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Default"
303+
declare -x GPU_DEVICE_3="gpu-3"
304+
declare -x GPU_DEVICE_3_SHARING_STRATEGY="TimeSlicing"
305+
declare -x GPU_DEVICE_3_TIMESLICE_INTERVAL="Default"
306306

307307
gpu-test5:
308308
pod0 ts-ctr0:
309-
declare -x GPU_DEVICE_0="gpu-18db0e85-99e9-c746-8531-ffeb86328b39"
310-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
311-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Long"
309+
declare -x GPU_DEVICE_4="gpu-4"
310+
declare -x GPU_DEVICE_4_SHARING_STRATEGY="TimeSlicing"
311+
declare -x GPU_DEVICE_4_TIMESLICE_INTERVAL="Long"
312312
pod0 ts-ctr1:
313-
declare -x GPU_DEVICE_0="gpu-18db0e85-99e9-c746-8531-ffeb86328b39"
314-
declare -x GPU_DEVICE_0_SHARING_STRATEGY="TimeSlicing"
315-
declare -x GPU_DEVICE_0_TIMESLICE_INTERVAL="Long"
313+
declare -x GPU_DEVICE_4="gpu-4"
314+
declare -x GPU_DEVICE_4_SHARING_STRATEGY="TimeSlicing"
315+
declare -x GPU_DEVICE_4_TIMESLICE_INTERVAL="Long"
316316
pod0 sp-ctr0:
317-
declare -x GPU_DEVICE_1="gpu-93d37703-997c-c46f-a531-755e3e0dc2ac"
318-
declare -x GPU_DEVICE_1_PARTITION_COUNT="10"
319-
declare -x GPU_DEVICE_1_SHARING_STRATEGY="SpacePartitioning"
317+
declare -x GPU_DEVICE_5="gpu-5"
318+
declare -x GPU_DEVICE_5_PARTITION_COUNT="10"
319+
declare -x GPU_DEVICE_5_SHARING_STRATEGY="SpacePartitioning"
320320
pod0 sp-ctr1:
321-
declare -x GPU_DEVICE_1="gpu-93d37703-997c-c46f-a531-755e3e0dc2ac"
322-
declare -x GPU_DEVICE_1_PARTITION_COUNT="10"
323-
declare -x GPU_DEVICE_1_SHARING_STRATEGY="SpacePartitioning"
321+
declare -x GPU_DEVICE_5="gpu-5"
322+
declare -x GPU_DEVICE_5_PARTITION_COUNT="10"
323+
declare -x GPU_DEVICE_5_SHARING_STRATEGY="SpacePartitioning"
324324
```
325325

326326
In this example resource driver, no "actual" GPUs are made available to any
327327
containers. Instead, a set of environment variables are set in each container
328328
to indicate which GPUs *would* have been injected into them by a real resource
329329
driver and how they *would* have been configured.
330330

331-
You can use the UUIDs of the GPUs as well as the GPU sharing settings set in
331+
You can use the IDs of the GPUs as well as the GPU sharing settings set in
332332
these environment variables to verify that they were handed out in a way
333333
consistent with the semantics shown in the figure above.
334334

cmd/dra-example-kubeletplugin/cdi.go

+12-29
Original file line numberDiff line numberDiff line change
@@ -89,36 +89,19 @@ func (cdi *CDIHandler) CreateClaimSpecFile(claimUID string, devices PreparedDevi
8989
Devices: []cdispec.Device{},
9090
}
9191

92-
for i, device := range devices {
93-
envs := []string{
94-
fmt.Sprintf("GPU_DEVICE_%d=%s", i, device.DeviceName),
95-
}
96-
97-
if device.Config.Sharing != nil {
98-
envs = append(envs, fmt.Sprintf("GPU_DEVICE_%d_SHARING_STRATEGY=%s", i, device.Config.Sharing.Strategy))
99-
}
100-
101-
switch {
102-
case device.Config.Sharing.IsTimeSlicing():
103-
tsconfig, err := device.Config.Sharing.GetTimeSlicingConfig()
104-
if err != nil {
105-
return fmt.Errorf("unable to get time slicing config for device %v: %v", device.DeviceName, err)
106-
}
107-
envs = append(envs, fmt.Sprintf("GPU_DEVICE_%d_TIMESLICE_INTERVAL=%v", i, tsconfig.Interval))
108-
109-
case device.Config.Sharing.IsSpacePartitioning():
110-
spconfig, err := device.Config.Sharing.GetSpacePartitioningConfig()
111-
if err != nil {
112-
return fmt.Errorf("unable to get space partitioning config for device %v: %v", device.DeviceName, err)
113-
}
114-
envs = append(envs, fmt.Sprintf("GPU_DEVICE_%d_PARTITION_COUNT=%v", i, spconfig.PartitionCount))
92+
for _, device := range devices {
93+
claimEdits := cdiapi.ContainerEdits{
94+
ContainerEdits: &cdispec.ContainerEdits{
95+
Env: []string{
96+
fmt.Sprintf("GPU_DEVICE_%s_RESOURCE_CLAIM=%s", device.DeviceName[4:], claimUID),
97+
},
98+
},
11599
}
100+
claimEdits.Append(device.ContainerEdits)
116101

117102
cdiDevice := cdispec.Device{
118-
Name: device.DeviceName,
119-
ContainerEdits: cdispec.ContainerEdits{
120-
Env: envs,
121-
},
103+
Name: fmt.Sprintf("%s-%s", claimUID, device.DeviceName),
104+
ContainerEdits: *claimEdits.ContainerEdits,
122105
}
123106

124107
spec.Devices = append(spec.Devices, cdiDevice)
@@ -138,13 +121,13 @@ func (cdi *CDIHandler) DeleteClaimSpecFile(claimUID string) error {
138121
return cdi.cache.RemoveSpec(specName)
139122
}
140123

141-
func (cdi *CDIHandler) GetClaimDevices(devices []string) []string {
124+
func (cdi *CDIHandler) GetClaimDevices(claimUID string, devices []string) []string {
142125
cdiDevices := []string{
143126
cdiparser.QualifiedName(cdiVendor, cdiClass, cdiCommonDeviceName),
144127
}
145128

146129
for _, device := range devices {
147-
cdiDevice := cdiparser.QualifiedName(cdiVendor, cdiClass, device)
130+
cdiDevice := cdiparser.QualifiedName(cdiVendor, cdiClass, fmt.Sprintf("%s-%s", claimUID, device))
148131
cdiDevices = append(cdiDevices, cdiDevice)
149132
}
150133

cmd/dra-example-kubeletplugin/discovery.go

+3-2
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
package main
1818

1919
import (
20+
"fmt"
2021
"math/rand"
2122
"os"
2223

@@ -35,7 +36,7 @@ func enumerateAllPossibleDevices() (AllocatableDevices, error) {
3536
alldevices := make(AllocatableDevices)
3637
for i, uuid := range uuids {
3738
device := resourceapi.Device{
38-
Name: uuid,
39+
Name: fmt.Sprintf("gpu-%d", i),
3940
Basic: &resourceapi.BasicDevice{
4041
Attributes: map[resourceapi.QualifiedName]resourceapi.DeviceAttribute{
4142
"index": {
@@ -56,7 +57,7 @@ func enumerateAllPossibleDevices() (AllocatableDevices, error) {
5657
},
5758
},
5859
}
59-
alldevices[uuid] = device
60+
alldevices[device.Name] = device
6061
}
6162
return alldevices, nil
6263
}

0 commit comments

Comments
 (0)