@@ -20,132 +20,111 @@ import (
20
20
"context"
21
21
"fmt"
22
22
23
- resourceapi "k8s.io/api/resource/v1alpha3"
23
+ metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
24
+ coreclientset "k8s.io/client-go/kubernetes"
25
+ "k8s.io/dynamic-resource-allocation/kubeletplugin"
24
26
"k8s.io/klog/v2"
27
+
25
28
drapbv1 "k8s.io/kubelet/pkg/apis/dra/v1alpha4"
26
29
)
27
30
28
31
var _ drapbv1.NodeServer = & driver {}
29
32
30
33
type driver struct {
31
- doneCh chan struct {}
34
+ client coreclientset.Interface
35
+ plugin kubeletplugin.DRAPlugin
32
36
state * DeviceState
33
37
}
34
38
35
39
func NewDriver (ctx context.Context , config * Config ) (* driver , error ) {
40
+ driver := & driver {
41
+ client : config .coreclient ,
42
+ }
43
+
36
44
state , err := NewDeviceState (config )
37
45
if err != nil {
38
46
return nil , err
39
47
}
48
+ driver .state = state
49
+
50
+ plugin , err := kubeletplugin .Start (
51
+ ctx ,
52
+ driver ,
53
+ kubeletplugin .KubeClient (config .coreclient ),
54
+ kubeletplugin .NodeName (config .flags .nodeName ),
55
+ kubeletplugin .DriverName (DriverName ),
56
+ kubeletplugin .RegistrarSocketPath (PluginRegistrationPath ),
57
+ kubeletplugin .PluginSocketPath (DriverPluginSocketPath ),
58
+ kubeletplugin .KubeletPluginSocketPath (DriverPluginSocketPath ))
59
+ if err != nil {
60
+ return nil , err
61
+ }
62
+ driver .plugin = plugin
40
63
41
- d := & driver {
42
- state : state ,
64
+ var resources kubeletplugin.Resources
65
+ for _ , device := range state .allocatable {
66
+ resources .Devices = append (resources .Devices , device )
43
67
}
68
+ plugin .PublishResources (ctx , resources )
44
69
45
- return d , nil
70
+ return driver , nil
46
71
}
47
72
48
73
func (d * driver ) Shutdown (ctx context.Context ) error {
49
- close ( d . doneCh )
74
+ d . plugin . Stop ( )
50
75
return nil
51
76
}
52
77
53
- func (d * driver ) NodeListAndWatchResources (req * drapbv1.NodeListAndWatchResourcesRequest , stream drapbv1.Node_NodeListAndWatchResourcesServer ) error {
54
- model := d .state .getResourceModelFromAllocatableDevices ()
55
- resp := & drapbv1.NodeListAndWatchResourcesResponse {
56
- Resources : []* resourceapi.ResourceModel {& model },
57
- }
58
-
59
- if err := stream .Send (resp ); err != nil {
60
- return err
61
- }
62
-
63
- //nolint:all,S1000: should use for range instead of for { select {} } (gosimple)
64
- for {
65
- select {
66
- case <- d .doneCh :
67
- return nil
68
- }
69
- // TODO: Update with case for when GPUs go unhealthy
70
- }
71
- }
72
-
73
78
func (d * driver ) NodePrepareResources (ctx context.Context , req * drapbv1.NodePrepareResourcesRequest ) (* drapbv1.NodePrepareResourcesResponse , error ) {
74
79
klog .Infof ("NodePrepareResource is called: number of claims: %d" , len (req .Claims ))
75
80
preparedResources := & drapbv1.NodePrepareResourcesResponse {Claims : map [string ]* drapbv1.NodePrepareResourceResponse {}}
76
81
77
- // In production version some common operations of d.nodeUnprepareResources
78
- // should be done outside of the loop, for instance updating the CR could
79
- // be done once after all HW was prepared.
80
82
for _ , claim := range req .Claims {
81
- preparedResources .Claims [claim .Uid ] = d .nodePrepareResource (ctx , claim )
83
+ preparedResources .Claims [claim .UID ] = d .nodePrepareResource (ctx , claim )
82
84
}
83
85
84
86
return preparedResources , nil
85
87
}
86
88
87
89
func (d * driver ) nodePrepareResource (ctx context.Context , claim * drapbv1.Claim ) * drapbv1.NodePrepareResourceResponse {
88
- if len (claim .StructuredResourceHandle ) == 0 {
89
- return & drapbv1.NodePrepareResourceResponse {
90
- Error : "driver only supports structured parameters" ,
91
- }
92
- }
93
-
94
- allocated , err := d .getAllocatedDevices (ctx , claim )
90
+ resourceClaim , err := d .client .ResourceV1alpha3 ().ResourceClaims (claim .Namespace ).Get (
91
+ ctx ,
92
+ claim .Name ,
93
+ metav1.GetOptions {})
95
94
if err != nil {
96
95
return & drapbv1.NodePrepareResourceResponse {
97
- Error : fmt .Sprintf ("error allocating devices for claim %v: %v " , claim .Uid , err ),
96
+ Error : fmt .Sprintf ("failed to fetch ResourceClaim %s in namespace %s " , claim .Name , claim . Namespace ),
98
97
}
99
98
}
100
99
101
- prepared , err := d .state .Prepare (claim . Uid , allocated )
100
+ prepared , err := d .state .Prepare (resourceClaim )
102
101
if err != nil {
103
102
return & drapbv1.NodePrepareResourceResponse {
104
- Error : fmt .Sprintf ("error preparing devices for claim %v: %v" , claim .Uid , err ),
103
+ Error : fmt .Sprintf ("error preparing devices for claim %v: %v" , claim .UID , err ),
105
104
}
106
105
}
107
106
108
- klog .Infof ("Returning newly prepared devices for claim '%v': %s " , claim .Uid , prepared )
109
- return & drapbv1.NodePrepareResourceResponse {CDIDevices : prepared }
107
+ klog .Infof ("Returning newly prepared devices for claim '%v': %v " , claim .UID , prepared )
108
+ return & drapbv1.NodePrepareResourceResponse {Devices : prepared }
110
109
}
111
110
112
111
func (d * driver ) NodeUnprepareResources (ctx context.Context , req * drapbv1.NodeUnprepareResourcesRequest ) (* drapbv1.NodeUnprepareResourcesResponse , error ) {
113
112
klog .Infof ("NodeUnPrepareResource is called: number of claims: %d" , len (req .Claims ))
114
113
unpreparedResources := & drapbv1.NodeUnprepareResourcesResponse {Claims : map [string ]* drapbv1.NodeUnprepareResourceResponse {}}
115
114
116
115
for _ , claim := range req .Claims {
117
- unpreparedResources .Claims [claim .Uid ] = d .nodeUnprepareResource (ctx , claim )
116
+ unpreparedResources .Claims [claim .UID ] = d .nodeUnprepareResource (ctx , claim )
118
117
}
119
118
120
119
return unpreparedResources , nil
121
120
}
122
121
123
122
func (d * driver ) nodeUnprepareResource (ctx context.Context , claim * drapbv1.Claim ) * drapbv1.NodeUnprepareResourceResponse {
124
- if len (claim .StructuredResourceHandle ) == 0 {
125
- return & drapbv1.NodeUnprepareResourceResponse {
126
- Error : "driver only supports structured parameters" ,
127
- }
128
- }
129
-
130
- if err := d .state .Unprepare (claim .Uid ); err != nil {
123
+ if err := d .state .Unprepare (claim .UID ); err != nil {
131
124
return & drapbv1.NodeUnprepareResourceResponse {
132
- Error : fmt .Sprintf ("error unpreparing devices for claim %v: %v" , claim .Uid , err ),
125
+ Error : fmt .Sprintf ("error unpreparing devices for claim %v: %v" , claim .UID , err ),
133
126
}
134
127
}
135
128
136
129
return & drapbv1.NodeUnprepareResourceResponse {}
137
130
}
138
-
139
- func (d * driver ) getAllocatedDevices (ctx context.Context , claim * drapbv1.Claim ) (AllocatedDevices , error ) {
140
- allocated := AllocatedDevices {
141
- Gpu : & AllocatedGpus {},
142
- }
143
-
144
- for _ , r := range claim .StructuredResourceHandle [0 ].Results {
145
- name := r .AllocationResultModel .NamedResources .Name
146
- gpu := fmt .Sprintf ("GPU-%s" , name [4 :])
147
- allocated .Gpu .Devices = append (allocated .Gpu .Devices , gpu )
148
- }
149
-
150
- return allocated , nil
151
- }
0 commit comments