Skip to content

Commit e3344ef

Browse files
authored
Merge pull request kubernetes-sigs#956 from sagor999/backoff
allow to specify wait time for attach disk operation
2 parents 222891a + ca65f3c commit e3344ef

File tree

2 files changed

+52
-12
lines changed

2 files changed

+52
-12
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -33,14 +33,24 @@ import (
3333
)
3434

3535
var (
36-
cloudConfigFilePath = flag.String("cloud-config", "", "Path to GCE cloud provider config")
37-
endpoint = flag.String("endpoint", "unix:/tmp/csi.sock", "CSI endpoint")
38-
runControllerService = flag.Bool("run-controller-service", true, "If set to false then the CSI driver does not activate its controller service (default: true)")
39-
runNodeService = flag.Bool("run-node-service", true, "If set to false then the CSI driver does not activate its node service (default: true)")
40-
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled.")
41-
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
42-
extraVolumeLabelsStr = flag.String("extra-labels", "", "Extra labels to attach to each PD created. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'. See https://cloud.google.com/compute/docs/labeling-resources for details")
43-
version string
36+
cloudConfigFilePath = flag.String("cloud-config", "", "Path to GCE cloud provider config")
37+
endpoint = flag.String("endpoint", "unix:/tmp/csi.sock", "CSI endpoint")
38+
runControllerService = flag.Bool("run-controller-service", true, "If set to false then the CSI driver does not activate its controller service (default: true)")
39+
runNodeService = flag.Bool("run-node-service", true, "If set to false then the CSI driver does not activate its node service (default: true)")
40+
httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled.")
41+
metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.")
42+
extraVolumeLabelsStr = flag.String("extra-labels", "", "Extra labels to attach to each PD created. It is a comma separated list of key value pairs like '<key1>=<value1>,<key2>=<value2>'. See https://cloud.google.com/compute/docs/labeling-resources for details")
43+
attachDiskBackoffDuration = flag.Duration("attach-disk-backoff-duration", 5*time.Second, "Duration for attachDisk backoff")
44+
attachDiskBackoffFactor = flag.Float64("attach-disk-backoff-factor", 0.0, "Factor for attachDisk backoff")
45+
attachDiskBackoffJitter = flag.Float64("attach-disk-backoff-jitter", 0.0, "Jitter for attachDisk backoff")
46+
attachDiskBackoffSteps = flag.Int("attach-disk-backoff-steps", 24, "Steps for attachDisk backoff")
47+
attachDiskBackoffCap = flag.Duration("attach-disk-backoff-cap", 0, "Cap for attachDisk backoff")
48+
waitForOpBackoffDuration = flag.Duration("wait-op-backoff-duration", 3*time.Second, "Duration for wait for operation backoff")
49+
waitForOpBackoffFactor = flag.Float64("wait-op-backoff-factor", 0.0, "Factor for wait for operation backoff")
50+
waitForOpBackoffJitter = flag.Float64("wait-op-backoff-jitter", 0.0, "Jitter for wait for operation backoff")
51+
waitForOpBackoffSteps = flag.Int("wait-op-backoff-steps", 100, "Steps for wait for operation backoff")
52+
waitForOpBackoffCap = flag.Duration("wait-op-backoff-cap", 0, "Cap for wait for operation backoff")
53+
version string
4454
)
4555

4656
const (
@@ -128,5 +138,17 @@ func handle() {
128138
klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err)
129139
}
130140

141+
gce.AttachDiskBackoff.Duration = *attachDiskBackoffDuration
142+
gce.AttachDiskBackoff.Factor = *attachDiskBackoffFactor
143+
gce.AttachDiskBackoff.Jitter = *attachDiskBackoffJitter
144+
gce.AttachDiskBackoff.Steps = *attachDiskBackoffSteps
145+
gce.AttachDiskBackoff.Cap = *attachDiskBackoffCap
146+
147+
gce.WaitForOpBackoff.Duration = *waitForOpBackoffDuration
148+
gce.WaitForOpBackoff.Factor = *waitForOpBackoffFactor
149+
gce.WaitForOpBackoff.Jitter = *waitForOpBackoffJitter
150+
gce.WaitForOpBackoff.Steps = *waitForOpBackoffSteps
151+
gce.WaitForOpBackoff.Cap = *waitForOpBackoffCap
152+
131153
gceDriver.Run(*endpoint)
132154
}

pkg/gce-cloud-provider/compute/gce-compute.go

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,24 @@ const (
4949
GCEAPIVersionBeta GCEAPIVersion = "beta"
5050
)
5151

52+
// AttachDiskBackoff is backoff used to wait for AttachDisk to complete.
53+
// Default values are similar to Poll every 5 seconds with 2 minute timeout.
54+
var AttachDiskBackoff = wait.Backoff{
55+
Duration: 5 * time.Second,
56+
Factor: 0.0,
57+
Jitter: 0.0,
58+
Steps: 24,
59+
Cap: 0}
60+
61+
// WaitForOpBackoff is backoff used to wait for Global, Regional or Zonal operation to complete.
62+
// Default values are similar to Poll every 3 seconds with 5 minute timeout.
63+
var WaitForOpBackoff = wait.Backoff{
64+
Duration: 3 * time.Second,
65+
Factor: 0.0,
66+
Jitter: 0.0,
67+
Steps: 100,
68+
Cap: 0}
69+
5270
type GCECompute interface {
5371
// Metadata information
5472
GetDefaultProject() string
@@ -739,7 +757,7 @@ func (cloud *CloudProvider) getRegionalDiskTypeURI(project string, region, diskT
739757

740758
func (cloud *CloudProvider) waitForZonalOp(ctx context.Context, project, opName string, zone string) error {
741759
// The v1 API can query for v1, alpha, or beta operations.
742-
return wait.Poll(3*time.Second, 5*time.Minute, func() (bool, error) {
760+
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
743761
pollOp, err := cloud.service.ZoneOperations.Get(project, zone, opName).Context(ctx).Do()
744762
if err != nil {
745763
klog.Errorf("WaitForOp(op: %s, zone: %#v) failed to poll the operation", opName, zone)
@@ -752,7 +770,7 @@ func (cloud *CloudProvider) waitForZonalOp(ctx context.Context, project, opName
752770

753771
func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opName string, region string) error {
754772
// The v1 API can query for v1, alpha, or beta operations.
755-
return wait.Poll(3*time.Second, 5*time.Minute, func() (bool, error) {
773+
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
756774
pollOp, err := cloud.service.RegionOperations.Get(project, region, opName).Context(ctx).Do()
757775
if err != nil {
758776
klog.Errorf("WaitForOp(op: %s, region: %#v) failed to poll the operation", opName, region)
@@ -764,7 +782,7 @@ func (cloud *CloudProvider) waitForRegionalOp(ctx context.Context, project, opNa
764782
}
765783

766784
func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName string) error {
767-
return wait.Poll(3*time.Second, 5*time.Minute, func() (bool, error) {
785+
return wait.ExponentialBackoff(WaitForOpBackoff, func() (bool, error) {
768786
pollOp, err := cloud.service.GlobalOperations.Get(project, opName).Context(ctx).Do()
769787
if err != nil {
770788
klog.Errorf("waitForGlobalOp(op: %s) failed to poll the operation", opName)
@@ -778,7 +796,7 @@ func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName
778796
func (cloud *CloudProvider) WaitForAttach(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error {
779797
klog.V(5).Infof("Waiting for attach of disk %v to instance %v to complete...", volKey.Name, instanceName)
780798
start := time.Now()
781-
return wait.Poll(5*time.Second, 2*time.Minute, func() (bool, error) {
799+
return wait.ExponentialBackoff(AttachDiskBackoff, func() (bool, error) {
782800
klog.V(6).Infof("Polling for attach of disk %v to instance %v to complete for %v", volKey.Name, instanceName, time.Since(start))
783801
disk, err := cloud.GetDisk(ctx, project, volKey, GCEAPIVersionV1)
784802
if err != nil {

0 commit comments

Comments
 (0)