@@ -46,6 +46,28 @@ const (
46
46
GCEAPIVersionV1 GCEAPIVersion = "v1"
47
47
// Alpha key type
48
48
GCEAPIVersionBeta GCEAPIVersion = "beta"
49
+ // OpPollInterval is the ineterval between polling an operation
50
+ OpPollInterval = 2 * time .Second
51
+ // OpTimeout is the default total timeout for polling on an operation
52
+ OpTimeout = 2 * time .Minute
53
+ // defaultCallTimeout is the ddefault context timeout for creating/getting on an operation
54
+ defaultCallTimeout = 3 * time .Second
55
+ // Time interval for checking on an operation
56
+ AttachDiskInitialDelay = 6 * time .Second
57
+
58
+ InsertDiskInitialDelay = 3 * time .Second
59
+
60
+ // volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
61
+ //volumeAttachmentStatusConsecutiveErrorLimit = 10
62
+
63
+ // Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
64
+ volumeAttachmentStatusInitialDelay = 2 * time .Second
65
+ // Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
66
+ volumeDetachmentStatusInitialDelay = 5 * time .Second
67
+ // After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
68
+ volumeAttachmentStatusPollDelay = 2 * time .Second
69
+ volumeAttachmentStatusFactor = 2
70
+ volumeAttachmentStatusSteps = 11
49
71
)
50
72
51
73
type GCECompute interface {
@@ -306,6 +328,11 @@ func ValidateDiskParameters(disk *CloudDisk, params common.DiskParameters) error
306
328
return nil
307
329
}
308
330
331
+ // ContextWithCallTimeout returns a context with a default timeout, used for generated client calls.
332
+ func ContextWithCallTimeout () (context.Context , context.CancelFunc ) {
333
+ return context .WithTimeout (context .Background (), defaultCallTimeout )
334
+ }
335
+
309
336
func (cloud * CloudProvider ) InsertDisk (ctx context.Context , volKey * meta.Key , params common.DiskParameters , capBytes int64 , capacityRange * csi.CapacityRange , replicaZones []string , snapshotID string , multiWriter bool ) error {
310
337
klog .V (5 ).Infof ("Inserting disk %v" , volKey )
311
338
@@ -607,26 +634,35 @@ func (cloud *CloudProvider) AttachDisk(ctx context.Context, volKey *meta.Key, re
607
634
Type : diskType ,
608
635
}
609
636
610
- op , err := cloud .service .Instances .AttachDisk (cloud .project , instanceZone , instanceName , attachedDiskV1 ).Context (ctx ).Do ()
637
+ callCtx , cancel := ContextWithCallTimeout ()
638
+ defer cancel ()
639
+
640
+ op , err := cloud .service .Instances .AttachDisk (cloud .project , instanceZone , instanceName , attachedDiskV1 ).Context (callCtx ).Do ()
611
641
if err != nil {
612
642
return fmt .Errorf ("failed cloud service attach disk call: %v" , err )
613
643
}
644
+ klog .V (5 ).Infof ("Attaching disk %s operation id is %s" , volKey , op .Name )
614
645
err = cloud .waitForZonalOp (ctx , op .Name , instanceZone )
615
646
if err != nil {
616
- return fmt .Errorf ("failed when waiting for zonal op: %v" , err )
647
+ return fmt .Errorf ("failed when waiting for zonal op %s on attaching volume %v : %v" , op . Name , volKey , err )
617
648
}
618
649
return nil
619
650
}
620
651
621
652
func (cloud * CloudProvider ) DetachDisk (ctx context.Context , deviceName , instanceZone , instanceName string ) error {
622
- klog .V (5 ).Infof ("Detaching disk %v from %v" , deviceName , instanceName )
623
- op , err := cloud .service .Instances .DetachDisk (cloud .project , instanceZone , instanceName , deviceName ).Context (ctx ).Do ()
653
+
654
+ callCtx , cancel := ContextWithCallTimeout ()
655
+ defer cancel ()
656
+
657
+ klog .V (5 ).Infof ("Detaching disk %s from %v" , deviceName , instanceName )
658
+ op , err := cloud .service .Instances .DetachDisk (cloud .project , instanceZone , instanceName , deviceName ).Context (callCtx ).Do ()
624
659
if err != nil {
625
660
return err
626
661
}
662
+ klog .V (5 ).Infof ("Detaching disk %s operation id is %s" , deviceName , op .Name )
627
663
err = cloud .waitForZonalOp (ctx , op .Name , instanceZone )
628
664
if err != nil {
629
- return err
665
+ return fmt . Errorf ( "failed when waiting for zonal op %s on detaching volume %s: %v" , op . Name , deviceName , err )
630
666
}
631
667
return nil
632
668
}
@@ -677,17 +713,45 @@ func (cloud *CloudProvider) getRegionalDiskTypeURI(region, diskType string) stri
677
713
return cloud .service .BasePath + fmt .Sprintf (diskTypeURITemplateRegional , cloud .project , region , diskType )
678
714
}
679
715
716
+ // SleepWithContext will wait for the timer duration to expire, or the context
717
+ // is canceled. Which ever happens first. If the context is canceled the Context's
718
+ // error will be returned.
719
+ //
720
+ // Expects Context to always return a non-nil error if the Done channel is closed.
721
+ /*func SleepWithContext(ctx Context, dur time.Duration) error {
722
+ t := time.NewTimer(dur)
723
+ defer t.Stop()
724
+
725
+ select {
726
+ case <-t.C:
727
+ break
728
+ case <-ctx.Done():
729
+ return ctx.Err()
730
+ }
731
+
732
+ return nil
733
+ }*/
734
+
680
735
func (cloud * CloudProvider ) waitForZonalOp (ctx context.Context , opName string , zone string ) error {
681
736
// The v1 API can query for v1, alpha, or beta operations.
682
737
svc := cloud .service
683
738
project := cloud .project
684
- return wait .Poll (3 * time .Second , 5 * time .Minute , func () (bool , error ) {
685
- pollOp , err := svc .ZoneOperations .Get (project , zone , opName ).Context (ctx ).Do ()
739
+ timeout := OpTimeout
740
+ if deadline , ok := ctx .Deadline (); ok {
741
+ timeout = time .Until (deadline )
742
+ klog .Infof ("Getting timeout from passed context %v" , timeout )
743
+ }
744
+ callCtx , cancel := ContextWithCallTimeout ()
745
+ defer cancel ()
746
+
747
+ return wait .Poll (OpPollInterval , timeout , func () (bool , error ) {
748
+ pollOp , err := svc .ZoneOperations .Get (project , zone , opName ).Context (callCtx ).Do ()
686
749
if err != nil {
687
750
klog .Errorf ("WaitForOp(op: %s, zone: %#v) failed to poll the operation" , opName , zone )
688
751
return false , err
689
752
}
690
753
done , err := opIsDone (pollOp )
754
+ klog .V (4 ).Infof ("checking op %s is done %t" , opName , done )
691
755
return done , err
692
756
})
693
757
}
@@ -743,7 +807,10 @@ func (cloud *CloudProvider) WaitForAttach(ctx context.Context, volKey *meta.Key,
743
807
}
744
808
745
809
func opIsDone (op * computev1.Operation ) (bool , error ) {
746
- if op == nil || op .Status != operationStatusDone {
810
+ if op == nil {
811
+ return true , fmt .Errorf ("operation is nil" )
812
+ }
813
+ if op .Status != operationStatusDone {
747
814
return false , nil
748
815
}
749
816
if op .Error != nil && len (op .Error .Errors ) > 0 && op .Error .Errors [0 ] != nil {
0 commit comments