Skip to content

Commit 52a23ee

Browse files
authored
Merge pull request #1630 from k8s-infra-cherrypick-robot/cherry-pick-1626-to-release-1.13
[release-1.13] Add flag to toggle instances.get over disks.get compute API on ControllerPublish WaitForAttach
2 parents 0c94a69 + 57738d6 commit 52a23ee

File tree

7 files changed

+84
-15
lines changed

7 files changed

+84
-15
lines changed

cmd/gce-pd-csi-driver/main.go

+9-1
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,8 @@ var (
7979
computeEndpoint *url.URL
8080
allowedComputeEnvironment = []gce.Environment{gce.EnvironmentStaging, gce.EnvironmentProduction}
8181

82+
useInstanceAPIOnWaitForAttachDiskTypesFlag = flag.String("use-instance-api-to-poll-attachment-disk-types", "", "Comma separated list of disk types that should use instances.get API when polling for disk attach during ControllerPublish")
83+
8284
version string
8385
)
8486

@@ -170,10 +172,16 @@ func handle() {
170172
DiskTypes: multiZoneVolumeHandleDiskTypes,
171173
}
172174

175+
// Initialize waitForAttach config
176+
useInstanceAPIOnWaitForAttachDiskTypes := strings.Split(*useInstanceAPIOnWaitForAttachDiskTypesFlag, ",")
177+
waitForAttachConfig := gce.WaitForAttachConfig{
178+
UseInstancesAPIForDiskTypes: useInstanceAPIOnWaitForAttachDiskTypes,
179+
}
180+
173181
// Initialize requirements for the controller service
174182
var controllerServer *driver.GCEControllerServer
175183
if *runControllerService {
176-
cloudProvider, err := gce.CreateCloudProvider(ctx, version, *cloudConfigFilePath, computeEndpoint, computeEnvironment)
184+
cloudProvider, err := gce.CreateCloudProvider(ctx, version, *cloudConfigFilePath, computeEndpoint, computeEnvironment, waitForAttachConfig)
177185
if err != nil {
178186
klog.Fatalf("Failed to get cloud provider: %v", err.Error())
179187
}

pkg/gce-cloud-provider/compute/fake-gce.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -318,7 +318,7 @@ func (cloud *FakeCloudProvider) getRegionalDiskTypeURI(project, region, diskType
318318
return fmt.Sprintf(diskTypeURITemplateRegional, project, region, diskType)
319319
}
320320

321-
func (cloud *FakeCloudProvider) WaitForAttach(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error {
321+
func (cloud *FakeCloudProvider) WaitForAttach(ctx context.Context, project string, volKey *meta.Key, diskType, instanceZone, instanceName string) error {
322322
return nil
323323
}
324324

pkg/gce-cloud-provider/compute/gce-compute.go

+40-4
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ type GCECompute interface {
9999
DetachDisk(ctx context.Context, project, deviceName, instanceZone, instanceName string) error
100100
GetDiskSourceURI(project string, volKey *meta.Key) string
101101
GetDiskTypeURI(project string, volKey *meta.Key, diskType string) string
102-
WaitForAttach(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error
102+
WaitForAttach(ctx context.Context, project string, volKey *meta.Key, diskType, instanceZone, instanceName string) error
103103
ResizeDisk(ctx context.Context, project string, volKey *meta.Key, requestBytes int64) (int64, error)
104104
ListDisks(ctx context.Context) ([]*computev1.Disk, string, error)
105105
// Regional Disk Methods
@@ -924,18 +924,46 @@ func (cloud *CloudProvider) waitForGlobalOp(ctx context.Context, project, opName
924924
})
925925
}
926926

927-
func (cloud *CloudProvider) WaitForAttach(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error {
927+
func (cloud *CloudProvider) waitForAttachOnInstance(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error {
928928
klog.V(5).Infof("Waiting for attach of disk %v to instance %v to complete...", volKey.Name, instanceName)
929929
start := time.Now()
930930
return wait.ExponentialBackoff(AttachDiskBackoff, func() (bool, error) {
931-
klog.V(6).Infof("Polling for attach of disk %v to instance %v to complete for %v", volKey.Name, instanceName, time.Since(start))
931+
klog.V(6).Infof("Polling instances.get for attach of disk %v to instance %v to complete for %v", volKey.Name, instanceName, time.Since(start))
932+
instance, err := cloud.GetInstanceOrError(ctx, instanceZone, instanceName)
933+
if err != nil {
934+
return false, fmt.Errorf("GetInstance failed to get instance: %w", err)
935+
}
936+
937+
if instance == nil {
938+
return false, fmt.Errorf("instance %v could not be found", instanceName)
939+
}
940+
941+
for _, disk := range instance.Disks {
942+
deviceName, err := common.GetDeviceName(volKey)
943+
if err != nil {
944+
return false, fmt.Errorf("failed to get disk device name for %s: %w", volKey, err)
945+
}
946+
947+
if deviceName == disk.DeviceName {
948+
return true, nil
949+
}
950+
}
951+
return false, nil
952+
})
953+
}
954+
955+
func (cloud *CloudProvider) waitForAttachOnDisk(ctx context.Context, project string, volKey *meta.Key, instanceZone, instanceName string) error {
956+
klog.V(5).Infof("Waiting for attach of disk %v to instance %v to complete...", volKey.Name, instanceName)
957+
start := time.Now()
958+
return wait.ExponentialBackoff(AttachDiskBackoff, func() (bool, error) {
959+
klog.V(6).Infof("Polling disks.get for attach of disk %v to instance %v to complete for %v", volKey.Name, instanceName, time.Since(start))
932960
disk, err := cloud.GetDisk(ctx, project, volKey, GCEAPIVersionV1)
933961
if err != nil {
934962
return false, fmt.Errorf("GetDisk failed to get disk: %w", err)
935963
}
936964

937965
if disk == nil {
938-
return false, fmt.Errorf("Disk %v could not be found", volKey.Name)
966+
return false, fmt.Errorf("disk %v could not be found", volKey.Name)
939967
}
940968

941969
for _, user := range disk.GetUsers() {
@@ -947,6 +975,14 @@ func (cloud *CloudProvider) WaitForAttach(ctx context.Context, project string, v
947975
})
948976
}
949977

978+
func (cloud *CloudProvider) WaitForAttach(ctx context.Context, project string, volKey *meta.Key, diskType, instanceZone, instanceName string) error {
979+
if cloud.waitForAttachConfig.ShouldUseGetInstanceAPI(diskType) {
980+
return cloud.waitForAttachOnInstance(ctx, project, volKey, instanceZone, instanceName)
981+
} else {
982+
return cloud.waitForAttachOnDisk(ctx, project, volKey, instanceZone, instanceName)
983+
}
984+
}
985+
950986
func wrapOpErr(name string, opErr *computev1.OperationErrorErrors) error {
951987
if opErr == nil {
952988
return nil

pkg/gce-cloud-provider/compute/gce.go

+22-7
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"google.golang.org/api/googleapi"
3737
"k8s.io/apimachinery/pkg/util/wait"
3838
"k8s.io/klog/v2"
39+
"k8s.io/utils/strings/slices"
3940
)
4041

4142
type Environment string
@@ -66,6 +67,8 @@ type CloudProvider struct {
6667
zone string
6768

6869
zonesCache map[string][]string
70+
71+
waitForAttachConfig WaitForAttachConfig
6972
}
7073

7174
var _ GCECompute = &CloudProvider{}
@@ -74,14 +77,25 @@ type ConfigFile struct {
7477
Global ConfigGlobal `gcfg:"global"`
7578
}
7679

80+
type WaitForAttachConfig struct {
81+
// A set of disk types that should use the compute instances.get API instead of the
82+
// disks.get API. For certain disk types, using the instances.get API is preferred
83+
// based on the response characteristics of the API.
84+
UseInstancesAPIForDiskTypes []string
85+
}
86+
87+
func (cfg WaitForAttachConfig) ShouldUseGetInstanceAPI(diskType string) bool {
88+
return slices.Contains(cfg.UseInstancesAPIForDiskTypes, diskType)
89+
}
90+
7791
type ConfigGlobal struct {
7892
TokenURL string `gcfg:"token-url"`
7993
TokenBody string `gcfg:"token-body"`
8094
ProjectId string `gcfg:"project-id"`
8195
Zone string `gcfg:"zone"`
8296
}
8397

84-
func CreateCloudProvider(ctx context.Context, vendorVersion string, configPath string, computeEndpoint *url.URL, computeEnvironment Environment) (*CloudProvider, error) {
98+
func CreateCloudProvider(ctx context.Context, vendorVersion string, configPath string, computeEndpoint *url.URL, computeEnvironment Environment, waitForAttachConfig WaitForAttachConfig) (*CloudProvider, error) {
8599
configFile, err := readConfig(configPath)
86100
if err != nil {
87101
return nil, err
@@ -120,12 +134,13 @@ func CreateCloudProvider(ctx context.Context, vendorVersion string, configPath s
120134
}
121135

122136
return &CloudProvider{
123-
service: svc,
124-
betaService: betasvc,
125-
alphaService: alphasvc,
126-
project: project,
127-
zone: zone,
128-
zonesCache: make(map[string]([]string)),
137+
service: svc,
138+
betaService: betasvc,
139+
alphaService: alphasvc,
140+
project: project,
141+
zone: zone,
142+
zonesCache: make(map[string]([]string)),
143+
waitForAttachConfig: waitForAttachConfig,
129144
}, nil
130145

131146
}

pkg/gce-pd-csi-driver/controller.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con
727727
return nil, common.LoggedError("Failed to Attach: ", err), disk
728728
}
729729

730-
err = gceCS.CloudProvider.WaitForAttach(ctx, project, volKey, instanceZone, instanceName)
730+
err = gceCS.CloudProvider.WaitForAttach(ctx, project, volKey, disk.GetPDType(), instanceZone, instanceName)
731731
if err != nil {
732732
return nil, common.LoggedError("Errored during WaitForAttach: ", err), disk
733733
}

test/e2e/tests/single_zone_e2e_test.go

+10
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ const (
5252
defaultVolumeLimit int64 = 127
5353
readyState = "READY"
5454
standardDiskType = "pd-standard"
55+
ssdDiskType = "pd-ssd"
5556
extremeDiskType = "pd-extreme"
5657
hdtDiskType = "hyperdisk-throughput"
5758
provisionedIOPSOnCreate = "12345"
@@ -297,6 +298,7 @@ var _ = Describe("GCE PD CSI Driver", func() {
297298
Entry("on pd-standard", standardDiskType),
298299
Entry("on pd-extreme", extremeDiskType),
299300
Entry("on hyperdisk-throughput", hdtDiskType),
301+
Entry("on pd-ssd", ssdDiskType),
300302
)
301303

302304
DescribeTable("Should complete publish/unpublish lifecycle with underspecified volume ID and missing volume",
@@ -1559,6 +1561,14 @@ var typeToDisk = map[string]*disk{
15591561
Expect(disk.ProvisionedThroughput).To(Equal(provisionedThroughputOnCreateInt))
15601562
},
15611563
},
1564+
ssdDiskType: {
1565+
params: map[string]string{
1566+
common.ParameterKeyType: ssdDiskType,
1567+
},
1568+
validate: func(disk *compute.Disk) {
1569+
Expect(disk.Type).To(ContainSubstring(ssdDiskType))
1570+
},
1571+
},
15621572
}
15631573

15641574
func merge(a, b map[string]string) map[string]string {

test/e2e/utils/utils.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ func GCEClientAndDriverSetup(instance *remote.InstanceInfo, computeEndpoint stri
6262
workspace := remote.NewWorkspaceDir("gce-pd-e2e-")
6363
// Log at V(6) as the compute API calls are emitted at that level and it's
6464
// useful to see what's happening when debugging tests.
65-
driverRunCmd := fmt.Sprintf("sh -c '/usr/bin/nohup %s/gce-pd-csi-driver -v=6 --endpoint=%s --multi-zone-volume-handle-enable --multi-zone-volume-handle-disk-types=pd-standard %s 2> %s/prog.out < /dev/null > /dev/null &'",
65+
driverRunCmd := fmt.Sprintf("sh -c '/usr/bin/nohup %s/gce-pd-csi-driver -v=6 --endpoint=%s --multi-zone-volume-handle-enable --multi-zone-volume-handle-disk-types=pd-standard --use-instance-api-to-poll-attachment-disk-types=pd-ssd %s 2> %s/prog.out < /dev/null > /dev/null &'",
6666
workspace, endpoint, strings.Join(extra_flags, " "), workspace)
6767

6868
config := &remote.ClientConfig{

0 commit comments

Comments
 (0)