@@ -46,6 +46,28 @@ const (
46
46
GCEAPIVersionV1 GCEAPIVersion = "v1"
47
47
// Alpha key type
48
48
GCEAPIVersionBeta GCEAPIVersion = "beta"
49
+ // OpPollInterval is the ineterval between polling an operation
50
+ OpPollInterval = 1 * time .Second
51
+ // OpTimeout is the default total timeout for polling on an operation
52
+ OpTimeout = 2 * time .Minute
53
+ // defaultCallTimeout is the ddefault context timeout for creating/getting on an operation
54
+ defaultCallTimeout = 3 * time .Second
55
+ // Time interval for checking on an operation
56
+ AttachDiskInitialDelay = 6 * time .Second
57
+
58
+ InsertDiskInitialDelay = 3 * time .Second
59
+
60
+ // volumeAttachmentConsecutiveErrorLimit is the number of consecutive errors we will ignore when waiting for a volume to attach/detach
61
+ //volumeAttachmentStatusConsecutiveErrorLimit = 10
62
+
63
+ // Attach typically takes 2-5 seconds (average is 2). Asking before 2 seconds is just waste of API quota.
64
+ volumeAttachmentStatusInitialDelay = 2 * time .Second
65
+ // Detach typically takes 5-10 seconds (average is 6). Asking before 5 seconds is just waste of API quota.
66
+ volumeDetachmentStatusInitialDelay = 5 * time .Second
67
+ // After the initial delay, poll attach/detach with exponential backoff (2046 seconds total)
68
+ volumeAttachmentStatusPollDelay = 2 * time .Second
69
+ volumeAttachmentStatusFactor = 2
70
+ volumeAttachmentStatusSteps = 11
49
71
)
50
72
51
73
type GCECompute interface {
@@ -306,6 +328,11 @@ func ValidateDiskParameters(disk *CloudDisk, params common.DiskParameters) error
306
328
return nil
307
329
}
308
330
331
+ // ContextWithCallTimeout returns a context with a default timeout, used for generated client calls.
332
+ func ContextWithCallTimeout () (context.Context , context.CancelFunc ) {
333
+ return context .WithTimeout (context .Background (), defaultCallTimeout )
334
+ }
335
+
309
336
func (cloud * CloudProvider ) InsertDisk (ctx context.Context , volKey * meta.Key , params common.DiskParameters , capBytes int64 , capacityRange * csi.CapacityRange , replicaZones []string , snapshotID string , multiWriter bool ) error {
310
337
klog .V (5 ).Infof ("Inserting disk %v" , volKey )
311
338
@@ -489,17 +516,20 @@ func (cloud *CloudProvider) insertZonalDisk(
489
516
}
490
517
}
491
518
519
+ callCtx , cancel := ContextWithCallTimeout ()
520
+ defer cancel ()
521
+
492
522
if gceAPIVersion == GCEAPIVersionBeta {
493
523
var insertOp * computebeta.Operation
494
524
betaDiskToCreate := convertV1DiskToBetaDisk (diskToCreate )
495
525
betaDiskToCreate .MultiWriter = multiWriter
496
- insertOp , err = cloud .betaService .Disks .Insert (cloud .project , volKey .Zone , betaDiskToCreate ).Context (ctx ).Do ()
526
+ insertOp , err = cloud .betaService .Disks .Insert (cloud .project , volKey .Zone , betaDiskToCreate ).Context (callCtx ).Do ()
497
527
if insertOp != nil {
498
528
opName = insertOp .Name
499
529
}
500
530
} else {
501
531
var insertOp * computev1.Operation
502
- insertOp , err = cloud .service .Disks .Insert (cloud .project , volKey .Zone , diskToCreate ).Context (ctx ).Do ()
532
+ insertOp , err = cloud .service .Disks .Insert (cloud .project , volKey .Zone , diskToCreate ).Context (callCtx ).Do ()
503
533
if insertOp != nil {
504
534
opName = insertOp .Name
505
535
}
@@ -523,7 +553,7 @@ func (cloud *CloudProvider) insertZonalDisk(
523
553
}
524
554
return fmt .Errorf ("unknown Insert disk error: %v" , err )
525
555
}
526
-
556
+ klog . Infof ( "Insert disk %s op id %s" , volKey . Name , opName )
527
557
err = cloud .waitForZonalOp (ctx , opName , volKey .Zone )
528
558
529
559
if err != nil {
@@ -568,6 +598,7 @@ func (cloud *CloudProvider) deleteZonalDisk(ctx context.Context, zone, name stri
568
598
}
569
599
return err
570
600
}
601
+ klog .Infof ("delete disk %s op id %s" , name , op .Name )
571
602
err = cloud .waitForZonalOp (ctx , op .Name , zone )
572
603
if err != nil {
573
604
return err
@@ -607,26 +638,35 @@ func (cloud *CloudProvider) AttachDisk(ctx context.Context, volKey *meta.Key, re
607
638
Type : diskType ,
608
639
}
609
640
610
- op , err := cloud .service .Instances .AttachDisk (cloud .project , instanceZone , instanceName , attachedDiskV1 ).Context (ctx ).Do ()
641
+ callCtx , cancel := ContextWithCallTimeout ()
642
+ defer cancel ()
643
+
644
+ op , err := cloud .service .Instances .AttachDisk (cloud .project , instanceZone , instanceName , attachedDiskV1 ).Context (callCtx ).Do ()
611
645
if err != nil {
612
- return fmt .Errorf ("failed cloud service attach disk call: %v" , err )
646
+ return fmt .Errorf ("failed cloud service attach disk %v call: %v" , volKey , err )
613
647
}
648
+ klog .V (5 ).Infof ("Attaching disk %s operation id is %s" , volKey , op .Name )
614
649
err = cloud .waitForZonalOp (ctx , op .Name , instanceZone )
615
650
if err != nil {
616
- return fmt .Errorf ("failed when waiting for zonal op: %v" , err )
651
+ return fmt .Errorf ("failed when waiting for zonal op %s on attaching volume %v : %v" , op . Name , volKey , err )
617
652
}
618
653
return nil
619
654
}
620
655
621
656
func (cloud * CloudProvider ) DetachDisk (ctx context.Context , deviceName , instanceZone , instanceName string ) error {
622
- klog .V (5 ).Infof ("Detaching disk %v from %v" , deviceName , instanceName )
623
- op , err := cloud .service .Instances .DetachDisk (cloud .project , instanceZone , instanceName , deviceName ).Context (ctx ).Do ()
657
+
658
+ callCtx , cancel := ContextWithCallTimeout ()
659
+ defer cancel ()
660
+
661
+ klog .V (5 ).Infof ("Detaching disk %s from %v" , deviceName , instanceName )
662
+ op , err := cloud .service .Instances .DetachDisk (cloud .project , instanceZone , instanceName , deviceName ).Context (callCtx ).Do ()
624
663
if err != nil {
625
664
return err
626
665
}
666
+ klog .V (5 ).Infof ("Detaching disk %s operation id is %s" , deviceName , op .Name )
627
667
err = cloud .waitForZonalOp (ctx , op .Name , instanceZone )
628
668
if err != nil {
629
- return err
669
+ return fmt . Errorf ( "failed when waiting for zonal op %s on detaching volume %s: %v" , op . Name , deviceName , err )
630
670
}
631
671
return nil
632
672
}
@@ -677,17 +717,45 @@ func (cloud *CloudProvider) getRegionalDiskTypeURI(region, diskType string) stri
677
717
return cloud .service .BasePath + fmt .Sprintf (diskTypeURITemplateRegional , cloud .project , region , diskType )
678
718
}
679
719
720
+ // SleepWithContext will wait for the timer duration to expire, or the context
721
+ // is canceled. Which ever happens first. If the context is canceled the Context's
722
+ // error will be returned.
723
+ //
724
+ // Expects Context to always return a non-nil error if the Done channel is closed.
725
+ /*func SleepWithContext(ctx Context, dur time.Duration) error {
726
+ t := time.NewTimer(dur)
727
+ defer t.Stop()
728
+
729
+ select {
730
+ case <-t.C:
731
+ break
732
+ case <-ctx.Done():
733
+ return ctx.Err()
734
+ }
735
+
736
+ return nil
737
+ }*/
738
+
680
739
func (cloud * CloudProvider ) waitForZonalOp (ctx context.Context , opName string , zone string ) error {
681
740
// The v1 API can query for v1, alpha, or beta operations.
682
741
svc := cloud .service
683
742
project := cloud .project
684
- return wait .Poll (3 * time .Second , 5 * time .Minute , func () (bool , error ) {
743
+ timeout := OpTimeout
744
+ if deadline , ok := ctx .Deadline (); ok {
745
+ timeout = time .Until (deadline )
746
+ klog .Infof ("Getting timeout from passed context opname %s %v" , opName , timeout )
747
+ }
748
+ //callCtx, cancel := ContextWithCallTimeout()
749
+ //defer cancel()
750
+
751
+ return wait .Poll (OpPollInterval , timeout , func () (bool , error ) {
685
752
pollOp , err := svc .ZoneOperations .Get (project , zone , opName ).Context (ctx ).Do ()
686
753
if err != nil {
687
- klog .Errorf ("WaitForOp(op: %s, zone: %#v) failed to poll the operation" , opName , zone )
754
+ klog .Errorf ("WaitForOp(op: %s, zone: %#v) failed to poll the operation: %v " , opName , zone , err )
688
755
return false , err
689
756
}
690
757
done , err := opIsDone (pollOp )
758
+ klog .V (4 ).Infof ("checking op %s is done %t" , opName , done )
691
759
return done , err
692
760
})
693
761
}
@@ -743,7 +811,10 @@ func (cloud *CloudProvider) WaitForAttach(ctx context.Context, volKey *meta.Key,
743
811
}
744
812
745
813
func opIsDone (op * computev1.Operation ) (bool , error ) {
746
- if op == nil || op .Status != operationStatusDone {
814
+ if op == nil {
815
+ return true , fmt .Errorf ("operation is nil" )
816
+ }
817
+ if op .Status != operationStatusDone {
747
818
return false , nil
748
819
}
749
820
if op .Error != nil && len (op .Error .Errors ) > 0 && op .Error .Errors [0 ] != nil {
0 commit comments