Skip to content

Commit 162b104

Browse files
committed
Add validating admission webhook to Helm chart
This change adds a validating admission webhook that validates the driver-specific opaque parameters that can be specified in ResourceClaims and ResourceClaimTemplates.
1 parent 0faf3f8 commit 162b104

17 files changed

+882
-5
lines changed

Diff for: .github/workflows/tests.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,5 @@ jobs:
2525
run: make PREFIX=artifacts cmds
2626
- name: List binaries
2727
run: ls -al artifacts/
28-
# no tests yet
29-
# - name: Test
30-
# run: go test -v -race ./...
28+
- name: Test
29+
run: go test -v -race ./...

Diff for: README.md

+18-2
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,21 @@ kube-system kube-scheduler-dra-example-driver-cluster-control-plane
7171
local-path-storage local-path-provisioner-7dbf974f64-9jmc7 1/1 Running 0 1m
7272
```
7373

74+
The validating admission webhook is disabled by default. To enable it, install cert-manager and its CRDs, then
75+
set the `webhook.enabled=true` value when the dra-example-driver chart is installed.
76+
```bash
77+
helm install \
78+
--repo https://charts.jetstack.io \
79+
--version v1.16.3 \
80+
--create-namespace \
81+
--namespace cert-manager \
82+
--wait \
83+
--set crds.enabled=true \
84+
cert-manager \
85+
cert-manager
86+
```
87+
More options for installing cert-manager can be found in [their docs](https://cert-manager.io/docs/installation/)
88+
7489
And then install the example resource driver via `helm`.
7590
```bash
7691
helm upgrade -i \
@@ -83,8 +98,9 @@ helm upgrade -i \
8398
Double check the driver components have come up successfully:
8499
```console
85100
$ kubectl get pod -n dra-example-driver
86-
NAME READY STATUS RESTARTS AGE
87-
dra-example-driver-kubeletplugin-qwmbl 1/1 Running 0 1m
101+
NAME READY STATUS RESTARTS AGE
102+
dra-example-driver-kubeletplugin-qwmbl 1/1 Running 0 1m
103+
dra-example-driver-webhook-7d465fbd5b-n2wxt 1/1 Running 0 1m
88104
```
89105

90106
And show the initial state of available GPU devices on the worker node:

Diff for: cmd/dra-example-webhook/main.go

+304
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,304 @@
1+
/*
2+
Copyright 2025 The Kubernetes Authors.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"encoding/json"
21+
"fmt"
22+
"io"
23+
"net/http"
24+
"os"
25+
"strings"
26+
27+
"github.com/urfave/cli/v2"
28+
29+
admissionv1 "k8s.io/api/admission/v1"
30+
resourceapi "k8s.io/api/resource/v1beta1"
31+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
32+
"k8s.io/apimachinery/pkg/runtime"
33+
"k8s.io/apimachinery/pkg/runtime/serializer"
34+
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
35+
"k8s.io/klog/v2"
36+
37+
configapi "sigs.k8s.io/dra-example-driver/api/example.com/resource/gpu/v1alpha1"
38+
"sigs.k8s.io/dra-example-driver/pkg/consts"
39+
"sigs.k8s.io/dra-example-driver/pkg/flags"
40+
)
41+
42+
var (
43+
resourceClaimResource = metav1.GroupVersionResource{
44+
Group: resourceapi.SchemeGroupVersion.Group,
45+
Version: resourceapi.SchemeGroupVersion.Version,
46+
Resource: "resourceclaims",
47+
}
48+
resourceClaimTemplateResource = metav1.GroupVersionResource{
49+
Group: resourceapi.SchemeGroupVersion.Group,
50+
Version: resourceapi.SchemeGroupVersion.Version,
51+
Resource: "resourceclaimtemplates",
52+
}
53+
)
54+
55+
type Flags struct {
56+
loggingConfig *flags.LoggingConfig
57+
58+
certFile string
59+
keyFile string
60+
port int
61+
}
62+
63+
var scheme = runtime.NewScheme()
64+
var codecs = serializer.NewCodecFactory(scheme)
65+
66+
func init() {
67+
utilruntime.Must(admissionv1.AddToScheme(scheme))
68+
}
69+
70+
func main() {
71+
if err := newApp().Run(os.Args); err != nil {
72+
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
73+
os.Exit(1)
74+
}
75+
}
76+
77+
func newApp() *cli.App {
78+
flags := &Flags{
79+
loggingConfig: flags.NewLoggingConfig(),
80+
}
81+
cliFlags := []cli.Flag{
82+
&cli.StringFlag{
83+
Name: "tls-cert-file",
84+
Usage: "File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert).",
85+
Destination: &flags.certFile,
86+
Required: true,
87+
},
88+
&cli.StringFlag{
89+
Name: "tls-private-key-file",
90+
Usage: "File containing the default x509 private key matching --tls-cert-file.",
91+
Destination: &flags.keyFile,
92+
Required: true,
93+
},
94+
&cli.IntFlag{
95+
Name: "port",
96+
Usage: "Secure port that the webhook listens on",
97+
Value: 443,
98+
Destination: &flags.port,
99+
},
100+
}
101+
cliFlags = append(cliFlags, flags.loggingConfig.Flags()...)
102+
103+
app := &cli.App{
104+
Name: "dra-example-webhook",
105+
Usage: "dra-example-webhook implements a validating admission webhook complementing a DRA driver plugin.",
106+
ArgsUsage: " ",
107+
HideHelpCommand: true,
108+
Flags: cliFlags,
109+
Before: func(c *cli.Context) error {
110+
if c.Args().Len() > 0 {
111+
return fmt.Errorf("arguments not supported: %v", c.Args().Slice())
112+
}
113+
return flags.loggingConfig.Apply()
114+
},
115+
Action: func(c *cli.Context) error {
116+
server := &http.Server{
117+
Handler: newMux(),
118+
Addr: fmt.Sprintf(":%d", flags.port),
119+
}
120+
klog.Info("starting webhook server on", server.Addr)
121+
return server.ListenAndServeTLS(flags.certFile, flags.keyFile)
122+
},
123+
}
124+
125+
return app
126+
}
127+
128+
func newMux() *http.ServeMux {
129+
mux := http.NewServeMux()
130+
mux.HandleFunc("/validate-resource-claim-parameters", serveResourceClaim)
131+
mux.HandleFunc("/readyz", func(w http.ResponseWriter, req *http.Request) {
132+
_, err := w.Write([]byte("ok"))
133+
if err != nil {
134+
http.Error(w, err.Error(), http.StatusInternalServerError)
135+
return
136+
}
137+
})
138+
return mux
139+
}
140+
141+
func serveResourceClaim(w http.ResponseWriter, r *http.Request) {
142+
serve(w, r, admitResourceClaimParameters)
143+
}
144+
145+
// serve handles the http portion of a request prior to handing to an admit
146+
// function.
147+
func serve(w http.ResponseWriter, r *http.Request, admit func(admissionv1.AdmissionReview) *admissionv1.AdmissionResponse) {
148+
var body []byte
149+
if r.Body != nil {
150+
data, err := io.ReadAll(r.Body)
151+
if err != nil {
152+
klog.Error(err)
153+
http.Error(w, err.Error(), http.StatusInternalServerError)
154+
return
155+
}
156+
body = data
157+
}
158+
159+
// verify the content type is accurate
160+
contentType := r.Header.Get("Content-Type")
161+
if contentType != "application/json" {
162+
msg := fmt.Sprintf("contentType=%s, expected application/json", contentType)
163+
klog.Error(msg)
164+
http.Error(w, msg, http.StatusUnsupportedMediaType)
165+
return
166+
}
167+
168+
klog.V(2).Infof("handling request: %s", body)
169+
170+
requestedAdmissionReview, err := readAdmissionReview(body)
171+
if err != nil {
172+
msg := fmt.Sprintf("failed to read AdmissionReview from request body: %v", err)
173+
klog.Error(msg)
174+
http.Error(w, msg, http.StatusBadRequest)
175+
return
176+
}
177+
responseAdmissionReview := &admissionv1.AdmissionReview{}
178+
responseAdmissionReview.SetGroupVersionKind(requestedAdmissionReview.GroupVersionKind())
179+
responseAdmissionReview.Response = admit(*requestedAdmissionReview)
180+
responseAdmissionReview.Response.UID = requestedAdmissionReview.Request.UID
181+
182+
klog.V(2).Infof("sending response: %v", responseAdmissionReview)
183+
respBytes, err := json.Marshal(responseAdmissionReview)
184+
if err != nil {
185+
klog.Error(err)
186+
http.Error(w, err.Error(), http.StatusInternalServerError)
187+
return
188+
}
189+
w.Header().Set("Content-Type", "application/json")
190+
if _, err := w.Write(respBytes); err != nil {
191+
klog.Error(err)
192+
}
193+
}
194+
195+
func readAdmissionReview(data []byte) (*admissionv1.AdmissionReview, error) {
196+
deserializer := codecs.UniversalDeserializer()
197+
obj, gvk, err := deserializer.Decode(data, nil, nil)
198+
if err != nil {
199+
return nil, fmt.Errorf("request could not be decoded: %w", err)
200+
}
201+
202+
if *gvk != admissionv1.SchemeGroupVersion.WithKind("AdmissionReview") {
203+
return nil, fmt.Errorf("unsupported group version kind: %v", gvk)
204+
}
205+
206+
requestedAdmissionReview, ok := obj.(*admissionv1.AdmissionReview)
207+
if !ok {
208+
return nil, fmt.Errorf("expected v1.AdmissionReview but got: %T", obj)
209+
}
210+
211+
return requestedAdmissionReview, nil
212+
}
213+
214+
// admitResourceClaimParameters accepts both ResourceClaims and ResourceClaimTemplates and validates their
215+
// opaque device configuration parameters for this driver.
216+
func admitResourceClaimParameters(ar admissionv1.AdmissionReview) *admissionv1.AdmissionResponse {
217+
klog.V(2).Info("admitting resource claim parameters")
218+
219+
var deviceConfigs []resourceapi.DeviceClaimConfiguration
220+
var specPath string
221+
222+
raw := ar.Request.Object.Raw
223+
deserializer := codecs.UniversalDeserializer()
224+
225+
switch ar.Request.Resource {
226+
case resourceClaimResource:
227+
claim := resourceapi.ResourceClaim{}
228+
if _, _, err := deserializer.Decode(raw, nil, &claim); err != nil {
229+
klog.Error(err)
230+
return &admissionv1.AdmissionResponse{
231+
Result: &metav1.Status{
232+
Message: err.Error(),
233+
Reason: metav1.StatusReasonBadRequest,
234+
},
235+
}
236+
}
237+
deviceConfigs = claim.Spec.Devices.Config
238+
specPath = "spec"
239+
case resourceClaimTemplateResource:
240+
claimTemplate := resourceapi.ResourceClaimTemplate{}
241+
if _, _, err := deserializer.Decode(raw, nil, &claimTemplate); err != nil {
242+
klog.Error(err)
243+
return &admissionv1.AdmissionResponse{
244+
Result: &metav1.Status{
245+
Message: err.Error(),
246+
Reason: metav1.StatusReasonBadRequest,
247+
},
248+
}
249+
}
250+
deviceConfigs = claimTemplate.Spec.Spec.Devices.Config
251+
specPath = "spec.spec"
252+
default:
253+
msg := fmt.Sprintf("expected resource to be %s or %s, got %s", resourceClaimResource, resourceClaimTemplateResource, ar.Request.Resource)
254+
klog.Error(msg)
255+
return &admissionv1.AdmissionResponse{
256+
Result: &metav1.Status{
257+
Message: msg,
258+
Reason: metav1.StatusReasonBadRequest,
259+
},
260+
}
261+
}
262+
263+
var errs []error
264+
for configIndex, config := range deviceConfigs {
265+
if config.Opaque == nil || config.Opaque.Driver != consts.DriverName {
266+
continue
267+
}
268+
269+
fieldPath := fmt.Sprintf("%s.devices.config[%d].opaque.parameters", specPath, configIndex)
270+
decodedConfig, err := runtime.Decode(configapi.Decoder, config.DeviceConfiguration.Opaque.Parameters.Raw)
271+
if err != nil {
272+
errs = append(errs, fmt.Errorf("error decoding object at %s: %w", fieldPath, err))
273+
continue
274+
}
275+
gpuConfig, ok := decodedConfig.(*configapi.GpuConfig)
276+
if !ok {
277+
errs = append(errs, fmt.Errorf("expected v1alpha1.GpuConfig at %s but got: %T", fieldPath, decodedConfig))
278+
continue
279+
}
280+
err = gpuConfig.Validate()
281+
if err != nil {
282+
errs = append(errs, fmt.Errorf("object at %s is invalid: %w", fieldPath, err))
283+
}
284+
}
285+
286+
if len(errs) > 0 {
287+
var errMsgs []string
288+
for _, err := range errs {
289+
errMsgs = append(errMsgs, err.Error())
290+
}
291+
msg := fmt.Sprintf("%d configs failed to validate: %s", len(errs), strings.Join(errMsgs, "; "))
292+
klog.Error(msg)
293+
return &admissionv1.AdmissionResponse{
294+
Result: &metav1.Status{
295+
Message: msg,
296+
Reason: metav1.StatusReason(metav1.StatusReasonInvalid),
297+
},
298+
}
299+
}
300+
301+
return &admissionv1.AdmissionResponse{
302+
Allowed: true,
303+
}
304+
}

0 commit comments

Comments
 (0)