Skip to content

Commit fb2f27d

Browse files
amacaskillsunnylovestiramisu
authored andcommitted
fix bug where volume cloning topology requirements are ignored when chosing the location of the volume
1 parent 8075e3d commit fb2f27d

File tree

3 files changed

+1112
-55
lines changed

3 files changed

+1112
-55
lines changed

pkg/gce-pd-csi-driver/controller.go

+126-6
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ import (
3636
"k8s.io/apimachinery/pkg/util/uuid"
3737
"k8s.io/client-go/util/flowcontrol"
3838
"k8s.io/klog/v2"
39+
"k8s.io/utils/strings/slices"
3940

4041
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
4142
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
@@ -107,6 +108,14 @@ type workItem struct {
107108
unpublishReq *csi.ControllerUnpublishVolumeRequest
108109
}
109110

111+
// locationRequirements are additional location topology requirements that must be respected when creating a volume.
112+
type locationRequirements struct {
113+
srcVolRegion string
114+
srcVolZone string
115+
srcReplicationType string
116+
cloneReplicationType string
117+
}
118+
110119
var _ csi.ControllerServer = &GCEControllerServer{}
111120

112121
const (
@@ -151,6 +160,44 @@ func isDiskReady(disk *gce.CloudDisk) (bool, error) {
151160
return false, nil
152161
}
153162

163+
// cloningLocationRequirements returns additional location requirements to be applied to the given create volume requests topology.
164+
// If the CreateVolumeRequest will use volume cloning, location requirements in compliance with the volume cloning limitations
165+
// will be returned: https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/volume-cloning#limitations.
166+
func cloningLocationRequirements(req *csi.CreateVolumeRequest, cloneReplicationType string) (*locationRequirements, error) {
167+
if !useVolumeCloning(req) {
168+
return nil, nil
169+
}
170+
// If we are using volume cloning, this will be set.
171+
volSrc := req.VolumeContentSource.GetVolume()
172+
volSrcVolID := volSrc.GetVolumeId()
173+
174+
_, sourceVolKey, err := common.VolumeIDToKey(volSrcVolID)
175+
if err != nil {
176+
return nil, fmt.Errorf("volume ID is invalid: %w", err)
177+
}
178+
179+
isZonalSrcVol := sourceVolKey.Type() == meta.Zonal
180+
if isZonalSrcVol {
181+
region, err := common.GetRegionFromZones([]string{sourceVolKey.Zone})
182+
if err != nil {
183+
return nil, fmt.Errorf("failed to get region from zones: %w", err)
184+
}
185+
sourceVolKey.Region = region
186+
}
187+
188+
srcReplicationType := replicationTypeNone
189+
if !isZonalSrcVol {
190+
srcReplicationType = replicationTypeRegionalPD
191+
}
192+
193+
return &locationRequirements{srcVolZone: sourceVolKey.Zone, srcVolRegion: sourceVolKey.Region, srcReplicationType: srcReplicationType, cloneReplicationType: cloneReplicationType}, nil
194+
}
195+
196+
// useVolumeCloning returns true if the create volume request should be created with volume cloning.
197+
func useVolumeCloning(req *csi.CreateVolumeRequest) bool {
198+
return req.VolumeContentSource != nil && req.VolumeContentSource.GetVolume() != nil
199+
}
200+
154201
func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) {
155202
var err error
156203
// Validate arguments
@@ -186,12 +233,21 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
186233
if multiWriter {
187234
gceAPIVersion = gce.GCEAPIVersionBeta
188235
}
236+
237+
var locationTopReq *locationRequirements
238+
if useVolumeCloning(req) {
239+
locationTopReq, err = cloningLocationRequirements(req, params.ReplicationType)
240+
if err != nil {
241+
return nil, status.Errorf(codes.InvalidArgument, "failed to get location requirements: %v", err.Error())
242+
}
243+
}
244+
189245
// Determine the zone or zones+region of the disk
190246
var zones []string
191247
var volKey *meta.Key
192248
switch params.ReplicationType {
193249
case replicationTypeNone:
194-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1)
250+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1, locationTopReq)
195251
if err != nil {
196252
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
197253
}
@@ -201,7 +257,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
201257
volKey = meta.ZonalKey(name, zones[0])
202258

203259
case replicationTypeRegionalPD:
204-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2)
260+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2, locationTopReq)
205261
if err != nil {
206262
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
207263
}
@@ -1382,7 +1438,29 @@ func diskIsAttachedAndCompatible(deviceName string, instance *compute.Instance,
13821438
return false, nil
13831439
}
13841440

1385-
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string, error) {
1441+
// pickZonesInRegion will remove any zones that are not in the given region.
1442+
func pickZonesInRegion(region string, zones []string) []string {
1443+
refinedZones := []string{}
1444+
for _, zone := range zones {
1445+
if strings.Contains(zone, region) {
1446+
refinedZones = append(refinedZones, zone)
1447+
}
1448+
}
1449+
return refinedZones
1450+
}
1451+
1452+
func prependZone(zone string, zones []string) []string {
1453+
newZones := []string{zone}
1454+
for i := 0; i < len(zones); i++ {
1455+
// Do not add a zone if it is equal to the zone that is already prepended to newZones.
1456+
if zones[i] != zone {
1457+
newZones = append(newZones, zones[i])
1458+
}
1459+
}
1460+
return newZones
1461+
}
1462+
1463+
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
13861464
reqZones, err := getZonesFromTopology(top.GetRequisite())
13871465
if err != nil {
13881466
return nil, fmt.Errorf("could not get zones from requisite topology: %w", err)
@@ -1392,6 +1470,39 @@ func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string
13921470
return nil, fmt.Errorf("could not get zones from preferred topology: %w", err)
13931471
}
13941472

1473+
if locationTopReq != nil {
1474+
srcVolZone := locationTopReq.srcVolZone
1475+
switch locationTopReq.cloneReplicationType {
1476+
// For zonal -> zonal cloning, the source disk zone must match the destination disk zone.
1477+
case replicationTypeNone:
1478+
// If the source volume zone is not in the topology requirement, we return an error.
1479+
if !slices.Contains(prefZones, srcVolZone) && !slices.Contains(reqZones, srcVolZone) {
1480+
volumeCloningReq := fmt.Sprintf("clone zone must match source disk zone: %s", srcVolZone)
1481+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1482+
}
1483+
return []string{srcVolZone}, nil
1484+
// For zonal or regional -> regional disk cloning, the source disk region must match the destination disk region.
1485+
case replicationTypeRegionalPD:
1486+
srcVolRegion := locationTopReq.srcVolRegion
1487+
prefZones = pickZonesInRegion(srcVolRegion, prefZones)
1488+
reqZones = pickZonesInRegion(srcVolRegion, reqZones)
1489+
1490+
if len(prefZones) == 0 && len(reqZones) == 0 {
1491+
volumeCloningReq := fmt.Sprintf("clone zone must reside in source disk region %s", srcVolRegion)
1492+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1493+
}
1494+
1495+
// For zonal -> regional disk cloning, one of the replicated zones must match the source zone.
1496+
if locationTopReq.srcReplicationType == replicationTypeNone {
1497+
if !slices.Contains(prefZones, srcVolZone) && !slices.Contains(reqZones, srcVolZone) {
1498+
volumeCloningReq := fmt.Sprintf("one of the replica zones of the clone must match the source disk zone: %s", srcVolZone)
1499+
return nil, fmt.Errorf("failed to find zone from topology %v: %s", top, volumeCloningReq)
1500+
}
1501+
prefZones = prependZone(srcVolZone, prefZones)
1502+
}
1503+
}
1504+
}
1505+
13951506
if numZones <= len(prefZones) {
13961507
return prefZones[0:numZones], nil
13971508
} else {
@@ -1450,16 +1561,25 @@ func getZoneFromSegment(seg map[string]string) (string, error) {
14501561
return zone, nil
14511562
}
14521563

1453-
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int) ([]string, error) {
1564+
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
14541565
var zones []string
14551566
var err error
14561567
if top != nil {
1457-
zones, err = pickZonesFromTopology(top, numZones)
1568+
zones, err = pickZonesFromTopology(top, numZones, locationTopReq)
14581569
if err != nil {
14591570
return nil, fmt.Errorf("failed to pick zones from topology: %w", err)
14601571
}
14611572
} else {
1462-
zones, err = getDefaultZonesInRegion(ctx, gceCS, []string{gceCS.CloudProvider.GetDefaultZone()}, numZones)
1573+
existingZones := []string{gceCS.CloudProvider.GetDefaultZone()}
1574+
// We set existingZones to the source volume zone so that for zonal -> zonal cloning, the clone is provisioned
1575+
// in the same zone as the source volume, and for zonal -> regional, one of the replicated zones will always
1576+
// be the zone of the source volume. For regional -> regional cloning, the srcVolZone will not be set, so we
1577+
// just use the default zone.
1578+
if locationTopReq != nil && locationTopReq.srcReplicationType == replicationTypeNone {
1579+
existingZones = []string{locationTopReq.srcVolZone}
1580+
}
1581+
// If topology is nil, then the Immediate binding mode was used without setting allowedTopologies in the storageclass.
1582+
zones, err = getDefaultZonesInRegion(ctx, gceCS, existingZones, numZones)
14631583
if err != nil {
14641584
return nil, fmt.Errorf("failed to get default %v zones in region: %w", numZones, err)
14651585
}

0 commit comments

Comments
 (0)