@@ -31,14 +31,18 @@ import (
31
31
"google.golang.org/grpc/status"
32
32
"k8s.io/apimachinery/pkg/util/sets"
33
33
"k8s.io/apimachinery/pkg/util/uuid"
34
- "k8s.io/apimachinery/pkg/util/wait"
35
- "k8s.io/client-go/util/workqueue"
34
+ "k8s.io/client-go/util/flowcontrol"
36
35
"k8s.io/klog"
37
36
38
37
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
39
38
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
40
39
)
41
40
41
+ const (
42
+ errorBackoffInitialDuration = 200 * time .Millisecond
43
+ errorBackoffMaxDuration = 5 * time .Minute
44
+ )
45
+
42
46
type GCEControllerServer struct {
43
47
Driver * GCEDriver
44
48
CloudProvider gce.GCECompute
@@ -54,16 +58,45 @@ type GCEControllerServer struct {
54
58
// Aborted error
55
59
volumeLocks * common.VolumeLocks
56
60
57
- // queue is a rate limited work queue for Controller Publish/Unpublish
58
- // Volume calls
59
- queue workqueue.RateLimitingInterface
61
+ // There are several kinds of errors that are immediately retried by either
62
+ // the CSI sidecars or the k8s control plane. The retries consume GCP api
63
+ // quota, eg by doing ListVolumes, and so backoff needs to be used to
64
+ // prevent quota exhaustion.
65
+ //
66
+ // Examples of these errors are the per-instance GCE operation queue getting
67
+ // full (typically only 32 operations in flight at a time are allowed), and
68
+ // disks being deleted out from under a PV causing unpublish errors.
69
+ //
70
+ // While we need to backoff, we also need some semblance of fairness. In
71
+ // particular, volume unpublish retries happen very quickly, and with
72
+ // a single backoff per node these retries can prevent any other operation
73
+ // from making progess, even if it would succeed. Hence we track errors on
74
+ // node and disk pairs, backing off only for calls matching such a
75
+ // pair.
76
+ //
77
+ // An implication is that in the full operation queue situation, requests
78
+ // for new disks will not backoff the first time. This is acceptible as a
79
+ // single spurious call will not cause problems for quota exhaustion or make
80
+ // the operation queue problem worse. This is well compensated by giving
81
+ // disks where no problems are ocurring a chance to be processed.
82
+ //
83
+ // errorBackoff keeps track of any active backoff condition on a given node,
84
+ // and the time when retry of controller publish/unpublish is permissible. A
85
+ // node and disk pair is marked with backoff when any error is encountered
86
+ // by the driver during controller publish/unpublish calls. If the
87
+ // controller eventually allows controller publish/publish requests for
88
+ // volumes (because the backoff time expired), and those requests fail, the
89
+ // next backoff retry time will be updated on every failure and capped at
90
+ // 'errorBackoffMaxDuration'. Also, any successful controller
91
+ // publish/unpublish call will clear the backoff condition for a node and
92
+ // disk.
93
+ errorBackoff * csiErrorBackoff
94
+ }
60
95
61
- // publishErrorsSeenOnNode is a list of nodes with attach/detach
62
- // operation failures so those nodes shall be rate limited for all
63
- // the attach/detach operations until there is an attach / detach
64
- // operation succeeds
65
- publishErrorsSeenOnNode map [string ]bool
96
+ type csiErrorBackoff struct {
97
+ backoff * flowcontrol.Backoff
66
98
}
99
+ type csiErrorBackoffId string
67
100
68
101
type workItem struct {
69
102
ctx context.Context
@@ -336,73 +369,27 @@ func (gceCS *GCEControllerServer) DeleteVolume(ctx context.Context, req *csi.Del
336
369
return & csi.DeleteVolumeResponse {}, nil
337
370
}
338
371
339
- // Run starts the GCEControllerServer.
340
- func (gceCS * GCEControllerServer ) Run () {
341
- go wait .Until (gceCS .worker , 1 * time .Second , wait .NeverStop )
342
- }
343
-
344
- func (gceCS * GCEControllerServer ) worker () {
345
- // Runs until workqueue is shut down
346
- for gceCS .processNextWorkItem () {
347
- }
348
- }
349
-
350
- func (gceCS * GCEControllerServer ) processNextWorkItem () bool {
351
- item , quit := gceCS .queue .Get ()
352
- if quit {
353
- return false
354
- }
355
- defer gceCS .queue .Done (item )
356
-
357
- workItem , ok := item .(* workItem )
358
- if ! ok {
359
- gceCS .queue .AddRateLimited (item )
360
- return true
361
- }
362
-
363
- if workItem .publishReq != nil {
364
- _ , err := gceCS .executeControllerPublishVolume (workItem .ctx , workItem .publishReq )
365
-
366
- if err != nil {
367
- klog .Errorf ("ControllerPublishVolume failed with error: %v" , err )
368
- }
369
- }
370
-
371
- if workItem .unpublishReq != nil {
372
- _ , err := gceCS .executeControllerUnpublishVolume (workItem .ctx , workItem .unpublishReq )
373
-
374
- if err != nil {
375
- klog .Errorf ("ControllerUnpublishVolume failed with error: %v" , err )
376
- }
377
- }
378
-
379
- gceCS .queue .Forget (item )
380
- return true
381
- }
382
-
383
372
func (gceCS * GCEControllerServer ) ControllerPublishVolume (ctx context.Context , req * csi.ControllerPublishVolumeRequest ) (* csi.ControllerPublishVolumeResponse , error ) {
384
- // Only valid requests will be queued
373
+ // Only valid requests will be accepted
385
374
_ , _ , err := gceCS .validateControllerPublishVolumeRequest (ctx , req )
386
-
387
375
if err != nil {
388
376
return nil , err
389
377
}
390
378
391
- // If the node is not marked, proceed the request
392
- if _ , found := gceCS .publishErrorsSeenOnNode [ req . NodeId ]; ! found {
393
- return gceCS . executeControllerPublishVolume ( ctx , req )
379
+ backoffId := gceCS . errorBackoff . backoffId ( req . NodeId , req . VolumeId )
380
+ if gceCS .errorBackoff . blocking ( backoffId ) {
381
+ return nil , status . Errorf ( codes . Unavailable , "ControllerPublish not permitted on node %q due to backoff condition" , req . NodeId )
394
382
}
395
383
396
- // Node is marked so queue up the request. Note the original gRPC context may get canceled,
397
- // so a new one is created here.
398
- //
399
- // Note that the original context probably has a timeout (see csiAttach in external-attacher),
400
- // which is ignored.
401
- gceCS .queue .AddRateLimited (& workItem {
402
- ctx : context .Background (),
403
- publishReq : req ,
404
- })
405
- return nil , status .Error (codes .Unavailable , "Request queued due to error condition on node" )
384
+ resp , err := gceCS .executeControllerPublishVolume (ctx , req )
385
+ if err != nil {
386
+ klog .Infof ("For node %s adding backoff due to error for volume %s: %v" , req .NodeId , req .VolumeId , err )
387
+ gceCS .errorBackoff .next (backoffId )
388
+ } else {
389
+ klog .Infof ("For node %s clear backoff due to successful publish of volume %v" , req .NodeId , req .VolumeId )
390
+ gceCS .errorBackoff .reset (backoffId )
391
+ }
392
+ return resp , err
406
393
}
407
394
408
395
func (gceCS * GCEControllerServer ) validateControllerPublishVolumeRequest (ctx context.Context , req * csi.ControllerPublishVolumeRequest ) (string , * meta.Key , error ) {
@@ -514,39 +501,33 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
514
501
515
502
err = gceCS .CloudProvider .WaitForAttach (ctx , project , volKey , instanceZone , instanceName )
516
503
if err != nil {
517
- // Mark the node and rate limit all the following attach/detach
518
- // operations for this node
519
- gceCS .publishErrorsSeenOnNode [nodeID ] = true
520
504
return nil , status .Error (codes .Internal , fmt .Sprintf ("unknown WaitForAttach error: %v" , err ))
521
505
}
522
-
523
- // Attach succeeds so unmark the node
524
- delete (gceCS .publishErrorsSeenOnNode , nodeID )
525
-
526
506
klog .V (4 ).Infof ("ControllerPublishVolume succeeded for disk %v to instance %v" , volKey , nodeID )
527
507
return pubVolResp , nil
528
508
}
529
509
530
510
func (gceCS * GCEControllerServer ) ControllerUnpublishVolume (ctx context.Context , req * csi.ControllerUnpublishVolumeRequest ) (* csi.ControllerUnpublishVolumeResponse , error ) {
531
511
// Only valid requests will be queued
532
512
_ , _ , err := gceCS .validateControllerUnpublishVolumeRequest (ctx , req )
533
-
534
513
if err != nil {
535
514
return nil , err
536
515
}
537
516
538
- // If the node is not marked, proceed the request
539
- if _ , found := gceCS .publishErrorsSeenOnNode [ req . NodeId ]; ! found {
540
- return gceCS . executeControllerUnpublishVolume ( ctx , req )
517
+ backoffId := gceCS . errorBackoff . backoffId ( req . NodeId , req . VolumeId )
518
+ if gceCS .errorBackoff . blocking ( backoffId ) {
519
+ return nil , status . Errorf ( codes . Unavailable , "ControllerUnpublish not permitted on node %q due to backoff condition" , req . NodeId )
541
520
}
542
521
543
- // Node is marked so queue up the request
544
- gceCS .queue .AddRateLimited (& workItem {
545
- ctx : context .Background (),
546
- unpublishReq : req ,
547
- })
548
-
549
- return nil , status .Error (codes .Unavailable , "Request queued due to error condition on node" )
522
+ resp , err := gceCS .executeControllerUnpublishVolume (ctx , req )
523
+ if err != nil {
524
+ klog .Infof ("For node %s adding backoff due to error for volume %s" , req .NodeId , req .VolumeId )
525
+ gceCS .errorBackoff .next (backoffId )
526
+ } else {
527
+ klog .Infof ("For node %s clear backoff due to successful unpublish of volume %v" , req .NodeId , req .VolumeId )
528
+ gceCS .errorBackoff .reset (backoffId )
529
+ }
530
+ return resp , err
550
531
}
551
532
552
533
func (gceCS * GCEControllerServer ) validateControllerUnpublishVolumeRequest (ctx context.Context , req * csi.ControllerUnpublishVolumeRequest ) (string , * meta.Key , error ) {
@@ -622,15 +603,9 @@ func (gceCS *GCEControllerServer) executeControllerUnpublishVolume(ctx context.C
622
603
623
604
err = gceCS .CloudProvider .DetachDisk (ctx , project , deviceName , instanceZone , instanceName )
624
605
if err != nil {
625
- // Mark the node and rate limit all the following attach/detach
626
- // operations for this node
627
- gceCS .publishErrorsSeenOnNode [nodeID ] = true
628
606
return nil , status .Error (codes .Internal , fmt .Sprintf ("unknown detach error: %v" , err ))
629
607
}
630
608
631
- // Detach succeeds so unmark the node
632
- delete (gceCS .publishErrorsSeenOnNode , nodeID )
633
-
634
609
klog .V (4 ).Infof ("ControllerUnpublishVolume succeeded for disk %v from node %v" , volKey , nodeID )
635
610
return & csi.ControllerUnpublishVolumeResponse {}, nil
636
611
}
@@ -1587,3 +1562,24 @@ func pickRandAndConsecutive(slice []string, n int) ([]string, error) {
1587
1562
}
1588
1563
return ret , nil
1589
1564
}
1565
+
1566
+ func newCsiErrorBackoff () * csiErrorBackoff {
1567
+ return & csiErrorBackoff {flowcontrol .NewBackOff (errorBackoffInitialDuration , errorBackoffMaxDuration )}
1568
+ }
1569
+
1570
+ func (_ * csiErrorBackoff ) backoffId (nodeId , volumeId string ) csiErrorBackoffId {
1571
+ return csiErrorBackoffId (fmt .Sprintf ("%s:%s" , nodeId , volumeId ))
1572
+ }
1573
+
1574
+ func (b * csiErrorBackoff ) blocking (id csiErrorBackoffId ) bool {
1575
+ blk := b .backoff .IsInBackOffSinceUpdate (string (id ), b .backoff .Clock .Now ())
1576
+ return blk
1577
+ }
1578
+
1579
+ func (b * csiErrorBackoff ) next (id csiErrorBackoffId ) {
1580
+ b .backoff .Next (string (id ), b .backoff .Clock .Now ())
1581
+ }
1582
+
1583
+ func (b * csiErrorBackoff ) reset (id csiErrorBackoffId ) {
1584
+ b .backoff .Reset (string (id ))
1585
+ }
0 commit comments