diff --git a/pkg/gce-cloud-provider/compute/gce-compute.go b/pkg/gce-cloud-provider/compute/gce-compute.go index e7962cffc..eaa306cb5 100644 --- a/pkg/gce-cloud-provider/compute/gce-compute.go +++ b/pkg/gce-cloud-provider/compute/gce-compute.go @@ -515,7 +515,9 @@ func (cloud *CloudProvider) insertRegionalDisk( if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -527,16 +529,19 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists, reusing", volKey.Name) return nil } - return status.Error(codes.Internal, fmt.Sprintf("unknown Insert disk error: %v", err.Error())) + // if the error code is considered "final", RegionDisks.Insert might not be retried + return fmt.Errorf("unknown Insert Regional disk error: %w", err) } klog.V(5).Infof("InsertDisk operation %s for disk %s", opName, diskToCreate.Name) err = cloud.waitForRegionalOp(ctx, project, opName, volKey.Region) + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if err != nil { if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -548,7 +553,7 @@ func (cloud *CloudProvider) insertRegionalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } @@ -630,7 +635,9 @@ func (cloud *CloudProvider) insertZonalDisk( if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + // failed to GetDisk, however the Disk may already exist + // the error code should be non-Final + return status.Error(codes.Unavailable, err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -642,6 +649,7 @@ func (cloud *CloudProvider) insertZonalDisk( klog.Warningf("GCE PD %s already exists, reusing", volKey.Name) return nil } + // if the error code is considered "final", Disks.Insert might not be retried return fmt.Errorf("unknown Insert disk error: %w", err) } klog.V(5).Infof("InsertDisk operation %s for disk %s", opName, diskToCreate.Name) @@ -649,10 +657,12 @@ func (cloud *CloudProvider) insertZonalDisk( err = cloud.waitForZonalOp(ctx, project, opName, volKey.Zone) if err != nil { + // failed to wait for Op to finish, however, the Op possibly is still running as expected + // the error code returned should be non-final if IsGCEError(err, "alreadyExists") { disk, err := cloud.GetDisk(ctx, project, volKey, gceAPIVersion) if err != nil { - return err + return status.Errorf(codes.Unavailable, "error when getting disk: %v", err.Error()) } err = cloud.ValidateExistingDisk(ctx, disk, params, int64(capacityRange.GetRequiredBytes()), @@ -664,7 +674,7 @@ func (cloud *CloudProvider) insertZonalDisk( klog.Warningf("GCE PD %s already exists after wait, reusing", volKey.Name) return nil } - return fmt.Errorf("unknown Insert disk operation error: %w", err) + return status.Errorf(codes.Unavailable, "unknown error when polling the operation: %v", err.Error()) } return nil } diff --git a/pkg/gce-pd-csi-driver/controller.go b/pkg/gce-pd-csi-driver/controller.go index 26537adc9..d570cf372 100644 --- a/pkg/gce-pd-csi-driver/controller.go +++ b/pkg/gce-pd-csi-driver/controller.go @@ -317,7 +317,8 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre existingDisk, err := gceCS.CloudProvider.GetDisk(ctx, gceCS.CloudProvider.GetDefaultProject(), volKey, gceAPIVersion) if err != nil { if !gce.IsGCEError(err, "notFound") { - return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", err) + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final + return nil, common.LoggedError("CreateVolume, failed to getDisk when validating: ", status.Error(codes.Unavailable, err.Error())) } } if err == nil { @@ -332,10 +333,10 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre ready, err := isDiskReady(existingDisk) if err != nil { - return nil, common.LoggedError("CreateVolume disk "+volKey.String()+" had error checking ready status: ", err) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk %q had error checking ready status: %v", volKey.String(), err.Error()) } if !ready { - return nil, status.Errorf(codes.Internal, "CreateVolume existing disk %v is not ready", volKey) + return nil, status.Errorf(codes.Aborted, "CreateVolume existing disk %v is not ready", volKey) } // If there is no validation error, immediately return success @@ -410,10 +411,10 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre // Verify the source disk is ready. ready, err := isDiskReady(diskFromSourceVolume) if err != nil { - return nil, common.LoggedError("CreateVolume disk from source volume "+sourceVolKey.String()+" had error checking ready status: ", err) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk from source volume %q had error checking ready status: %v", sourceVolKey.String(), err.Error()) } if !ready { - return nil, status.Errorf(codes.Internal, "CreateVolume disk from source volume %v is not ready", sourceVolKey) + return nil, status.Errorf(codes.Aborted, "CreateVolume disk from source volume %v is not ready", sourceVolKey) } } } else { // if VolumeContentSource is nil, validate access mode is not read only @@ -1861,9 +1862,10 @@ func createRegionalDisk(ctx context.Context, cloudProvider gce.GCECompute, name if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.RegionalKey(name, region), gceAPIVersion) if err != nil { - return nil, fmt.Errorf("failed to get disk after creating regional disk: %w", err) + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating regional disk: %v", err.Error()) } return disk, nil } @@ -1883,9 +1885,10 @@ func createSingleZoneDisk(ctx context.Context, cloudProvider gce.GCECompute, nam if multiWriter { gceAPIVersion = gce.GCEAPIVersionBeta } + // failed to GetDisk, however the Disk may already be created, the error code should be non-Final disk, err := cloudProvider.GetDisk(ctx, project, meta.ZonalKey(name, diskZone), gceAPIVersion) if err != nil { - return nil, err + return nil, status.Errorf(codes.Unavailable, "failed to get disk after creating zonal disk: %v", err.Error()) } return disk, nil }