Skip to content

Commit e04a8e7

Browse files
mattcaryjsafrane
authored andcommitted
UPSTREAM: 1028: backoff per {node,disk} pair instead of just node}
1 parent 63587ff commit e04a8e7

File tree

3 files changed

+154
-76
lines changed

3 files changed

+154
-76
lines changed

pkg/gce-pd-csi-driver/controller.go

+70-12
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,8 @@ import (
3939
)
4040

4141
const (
42-
nodeBackoffInitialDuration = 200 * time.Millisecond
43-
nodeBackoffMaxDuration = 5 * time.Minute
42+
errorBackoffInitialDuration = 200 * time.Millisecond
43+
errorBackoffMaxDuration = 5 * time.Minute
4444
)
4545

4646
type GCEControllerServer struct {
@@ -58,11 +58,46 @@ type GCEControllerServer struct {
5858
// Aborted error
5959
volumeLocks *common.VolumeLocks
6060

61-
// When the attacher sidecar issues controller publish/unpublish for multiple disks for a given node, the per-instance operation queue in GCE fills up causing attach/detach disk requests to immediately return with an error until the queue drains. nodeBackoff keeps track of any active backoff condition on a given node, and the time when retry of controller publish/unpublish is permissible. A node is marked with backoff when any error is encountered by the driver during controller publish/unpublish calls.
62-
// If the controller eventually allows controller publish/publish requests for volumes (because the backoff time expired), and those requests fail, the next backoff retry time will be updated on every failure and capped at 'nodeBackoffMaxDuration'. Also, any successful controller publish/unpublish call will clear the backoff condition for the node.
63-
nodeBackoff *flowcontrol.Backoff
61+
// There are several kinds of errors that are immediately retried by either
62+
// the CSI sidecars or the k8s control plane. The retries consume GCP api
63+
// quota, eg by doing ListVolumes, and so backoff needs to be used to
64+
// prevent quota exhaustion.
65+
//
66+
// Examples of these errors are the per-instance GCE operation queue getting
67+
// full (typically only 32 operations in flight at a time are allowed), and
68+
// disks being deleted out from under a PV causing unpublish errors.
69+
//
70+
// While we need to backoff, we also need some semblance of fairness. In
71+
// particular, volume unpublish retries happen very quickly, and with
72+
// a single backoff per node these retries can prevent any other operation
73+
// from making progess, even if it would succeed. Hence we track errors on
74+
// node and disk pairs, backing off only for calls matching such a
75+
// pair.
76+
//
77+
// An implication is that in the full operation queue situation, requests
78+
// for new disks will not backoff the first time. This is acceptible as a
79+
// single spurious call will not cause problems for quota exhaustion or make
80+
// the operation queue problem worse. This is well compensated by giving
81+
// disks where no problems are ocurring a chance to be processed.
82+
//
83+
// errorBackoff keeps track of any active backoff condition on a given node,
84+
// and the time when retry of controller publish/unpublish is permissible. A
85+
// node and disk pair is marked with backoff when any error is encountered
86+
// by the driver during controller publish/unpublish calls. If the
87+
// controller eventually allows controller publish/publish requests for
88+
// volumes (because the backoff time expired), and those requests fail, the
89+
// next backoff retry time will be updated on every failure and capped at
90+
// 'errorBackoffMaxDuration'. Also, any successful controller
91+
// publish/unpublish call will clear the backoff condition for a node and
92+
// disk.
93+
errorBackoff *csiErrorBackoff
6494
}
6595

96+
type csiErrorBackoff struct {
97+
backoff *flowcontrol.Backoff
98+
}
99+
type csiErrorBackoffId string
100+
66101
type workItem struct {
67102
ctx context.Context
68103
publishReq *csi.ControllerPublishVolumeRequest
@@ -341,17 +376,18 @@ func (gceCS *GCEControllerServer) ControllerPublishVolume(ctx context.Context, r
341376
return nil, err
342377
}
343378

344-
if gceCS.nodeBackoff.IsInBackOffSinceUpdate(req.NodeId, gceCS.nodeBackoff.Clock.Now()) {
379+
backoffId := gceCS.errorBackoff.backoffId(req.NodeId, req.VolumeId)
380+
if gceCS.errorBackoff.blocking(backoffId) {
345381
return nil, status.Errorf(codes.Unavailable, "ControllerPublish not permitted on node %q due to backoff condition", req.NodeId)
346382
}
347383

348384
resp, err := gceCS.executeControllerPublishVolume(ctx, req)
349385
if err != nil {
350-
klog.Infof("For node %s adding backoff due to error for volume %s", req.NodeId, req.VolumeId)
351-
gceCS.nodeBackoff.Next(req.NodeId, gceCS.nodeBackoff.Clock.Now())
386+
klog.Infof("For node %s adding backoff due to error for volume %s: %v", req.NodeId, req.VolumeId, err)
387+
gceCS.errorBackoff.next(backoffId)
352388
} else {
353389
klog.Infof("For node %s clear backoff due to successful publish of volume %v", req.NodeId, req.VolumeId)
354-
gceCS.nodeBackoff.Reset(req.NodeId)
390+
gceCS.errorBackoff.reset(backoffId)
355391
}
356392
return resp, err
357393
}
@@ -478,17 +514,18 @@ func (gceCS *GCEControllerServer) ControllerUnpublishVolume(ctx context.Context,
478514
return nil, err
479515
}
480516

481-
if gceCS.nodeBackoff.IsInBackOffSinceUpdate(req.NodeId, gceCS.nodeBackoff.Clock.Now()) {
517+
backoffId := gceCS.errorBackoff.backoffId(req.NodeId, req.VolumeId)
518+
if gceCS.errorBackoff.blocking(backoffId) {
482519
return nil, status.Errorf(codes.Unavailable, "ControllerUnpublish not permitted on node %q due to backoff condition", req.NodeId)
483520
}
484521

485522
resp, err := gceCS.executeControllerUnpublishVolume(ctx, req)
486523
if err != nil {
487524
klog.Infof("For node %s adding backoff due to error for volume %s", req.NodeId, req.VolumeId)
488-
gceCS.nodeBackoff.Next(req.NodeId, gceCS.nodeBackoff.Clock.Now())
525+
gceCS.errorBackoff.next(backoffId)
489526
} else {
490527
klog.Infof("For node %s clear backoff due to successful unpublish of volume %v", req.NodeId, req.VolumeId)
491-
gceCS.nodeBackoff.Reset(req.NodeId)
528+
gceCS.errorBackoff.reset(backoffId)
492529
}
493530
return resp, err
494531
}
@@ -1525,3 +1562,24 @@ func pickRandAndConsecutive(slice []string, n int) ([]string, error) {
15251562
}
15261563
return ret, nil
15271564
}
1565+
1566+
func newCsiErrorBackoff() *csiErrorBackoff {
1567+
return &csiErrorBackoff{flowcontrol.NewBackOff(errorBackoffInitialDuration, errorBackoffMaxDuration)}
1568+
}
1569+
1570+
func (_ *csiErrorBackoff) backoffId(nodeId, volumeId string) csiErrorBackoffId {
1571+
return csiErrorBackoffId(fmt.Sprintf("%s:%s", nodeId, volumeId))
1572+
}
1573+
1574+
func (b *csiErrorBackoff) blocking(id csiErrorBackoffId) bool {
1575+
blk := b.backoff.IsInBackOffSinceUpdate(string(id), b.backoff.Clock.Now())
1576+
return blk
1577+
}
1578+
1579+
func (b *csiErrorBackoff) next(id csiErrorBackoffId) {
1580+
b.backoff.Next(string(id), b.backoff.Clock.Now())
1581+
}
1582+
1583+
func (b *csiErrorBackoff) reset(id csiErrorBackoffId) {
1584+
b.backoff.Reset(string(id))
1585+
}

0 commit comments

Comments
 (0)