Skip to content

Commit 3472a21

Browse files
committed
fix bug where volume cloning topology requirements are ignored when chosing the location of the volume
1 parent 0e7a44f commit 3472a21

File tree

2 files changed

+290
-9
lines changed

2 files changed

+290
-9
lines changed

pkg/gce-pd-csi-driver/controller.go

+104-7
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ import (
2020
"math/rand"
2121
"regexp"
2222
"sort"
23+
"strings"
2324
"time"
2425

2526
"github.com/GoogleCloudPlatform/k8s-cloud-provider/pkg/cloud/meta"
@@ -32,6 +33,7 @@ import (
3233
"k8s.io/apimachinery/pkg/util/uuid"
3334
"k8s.io/client-go/util/flowcontrol"
3435
"k8s.io/klog/v2"
36+
"k8s.io/utils/strings/slices"
3537

3638
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
3739
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
@@ -98,6 +100,13 @@ type workItem struct {
98100
unpublishReq *csi.ControllerUnpublishVolumeRequest
99101
}
100102

103+
// locationRequirements are additional location topology requirements that must be respected when creating a volume.
104+
type locationRequirements struct {
105+
region string
106+
zone string
107+
replicationType string
108+
}
109+
101110
var _ csi.ControllerServer = &GCEControllerServer{}
102111

103112
const (
@@ -142,6 +151,40 @@ func isDiskReady(disk *gce.CloudDisk) (bool, error) {
142151
return false, nil
143152
}
144153

154+
// cloningLocationRequirements returns additional location requirements to be applied to the given create volume requests topology.
155+
// If the CreateVolumeRequest will use volume cloning, location requirements in compliance with the volume cloning limitations
156+
// will be returned: https://cloud.google.com/kubernetes-engine/docs/how-to/persistent-volumes/volume-cloning#limitations.
157+
func cloningLocationRequirements(req *csi.CreateVolumeRequest, replicationType string) (*locationRequirements, error) {
158+
if !useVolumeCloning(req) {
159+
return nil, nil
160+
}
161+
// If we are using volume cloning, this will be set.
162+
volSrc := req.VolumeContentSource.GetVolume()
163+
volSrcVolID := volSrc.GetVolumeId()
164+
165+
_, sourceVolKey, err := common.VolumeIDToKey(volSrcVolID)
166+
if err != nil {
167+
return nil, err
168+
}
169+
if isZonalKey(sourceVolKey) {
170+
region, err := common.GetRegionFromZones([]string{sourceVolKey.Zone})
171+
if err != nil {
172+
return nil, err
173+
}
174+
sourceVolKey.Region = region
175+
}
176+
return &locationRequirements{zone: sourceVolKey.Zone, region: sourceVolKey.Region, replicationType: replicationType}, nil
177+
}
178+
179+
func isZonalKey(key *meta.Key) bool {
180+
return key.Zone != "" && key.Region == ""
181+
}
182+
183+
// useVolumeCloning returns true if the create volume request should be created with volume cloning.
184+
func useVolumeCloning(req *csi.CreateVolumeRequest) bool {
185+
return req.VolumeContentSource != nil && req.VolumeContentSource.GetVolume() != nil
186+
}
187+
145188
func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest) (*csi.CreateVolumeResponse, error) {
146189
var err error
147190
// Validate arguments
@@ -177,12 +220,21 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
177220
if multiWriter {
178221
gceAPIVersion = gce.GCEAPIVersionBeta
179222
}
223+
224+
var locationTopReq *locationRequirements
225+
if useVolumeCloning(req) {
226+
locationTopReq, err = cloningLocationRequirements(req, params.ReplicationType)
227+
if err != nil {
228+
return nil, status.Errorf(codes.InvalidArgument, "failed to get location requirements: %v", err.Error())
229+
}
230+
}
231+
180232
// Determine the zone or zones+region of the disk
181233
var zones []string
182234
var volKey *meta.Key
183235
switch params.ReplicationType {
184236
case replicationTypeNone:
185-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1)
237+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 1, locationTopReq)
186238
if err != nil {
187239
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
188240
}
@@ -192,7 +244,7 @@ func (gceCS *GCEControllerServer) CreateVolume(ctx context.Context, req *csi.Cre
192244
volKey = meta.ZonalKey(name, zones[0])
193245

194246
case replicationTypeRegionalPD:
195-
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2)
247+
zones, err = pickZones(ctx, gceCS, req.GetAccessibilityRequirements(), 2, locationTopReq)
196248
if err != nil {
197249
return nil, status.Errorf(codes.InvalidArgument, "CreateVolume failed to pick zones for disk: %v", err.Error())
198250
}
@@ -1358,7 +1410,18 @@ func diskIsAttachedAndCompatible(deviceName string, instance *compute.Instance,
13581410
return false, nil
13591411
}
13601412

1361-
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string, error) {
1413+
// pickZonesInRegion will remove any zones that are not in the given region.
1414+
func pickZonesInRegion(region string, zones []string) []string {
1415+
refinedZones := []string{}
1416+
for _, zone := range zones {
1417+
if strings.Contains(zone, region) {
1418+
refinedZones = append(refinedZones, zone)
1419+
}
1420+
}
1421+
return refinedZones
1422+
}
1423+
1424+
func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
13621425
reqZones, err := getZonesFromTopology(top.GetRequisite())
13631426
if err != nil {
13641427
return nil, fmt.Errorf("could not get zones from requisite topology: %w", err)
@@ -1368,6 +1431,21 @@ func pickZonesFromTopology(top *csi.TopologyRequirement, numZones int) ([]string
13681431
return nil, fmt.Errorf("could not get zones from preferred topology: %w", err)
13691432
}
13701433

1434+
if locationTopReq != nil {
1435+
switch locationTopReq.replicationType {
1436+
// For zonal -> zonal cloning, the source disk zone must match the destination disk zone.
1437+
case replicationTypeNone:
1438+
if slices.Contains(prefZones, locationTopReq.zone) || slices.Contains(reqZones, locationTopReq.zone) {
1439+
return []string{locationTopReq.zone}, nil
1440+
}
1441+
return nil, fmt.Errorf("failed to find zone in preferred zones %v or requisite zones %v that satisfies location requirements %v", prefZones, reqZones, locationTopReq)
1442+
// for zonal or regional -> regional disk cloning, the source disk region must match the destination disk region.
1443+
case replicationTypeRegionalPD:
1444+
prefZones = pickZonesInRegion(locationTopReq.region, prefZones)
1445+
reqZones = pickZonesInRegion(locationTopReq.region, reqZones)
1446+
}
1447+
}
1448+
13711449
if numZones <= len(prefZones) {
13721450
return prefZones[0:numZones], nil
13731451
} else {
@@ -1426,16 +1504,16 @@ func getZoneFromSegment(seg map[string]string) (string, error) {
14261504
return zone, nil
14271505
}
14281506

1429-
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int) ([]string, error) {
1507+
func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.TopologyRequirement, numZones int, locationTopReq *locationRequirements) ([]string, error) {
14301508
var zones []string
14311509
var err error
14321510
if top != nil {
1433-
zones, err = pickZonesFromTopology(top, numZones)
1511+
zones, err = pickZonesFromTopology(top, numZones, locationTopReq)
14341512
if err != nil {
14351513
return nil, fmt.Errorf("failed to pick zones from topology: %w", err)
14361514
}
14371515
} else {
1438-
zones, err = getDefaultZonesInRegion(ctx, gceCS, []string{gceCS.CloudProvider.GetDefaultZone()}, numZones)
1516+
zones, err = getDefaultZonesInRegion(ctx, gceCS, []string{gceCS.CloudProvider.GetDefaultZone()}, numZones, locationTopReq)
14391517
if err != nil {
14401518
return nil, fmt.Errorf("failed to get default %v zones in region: %w", numZones, err)
14411519
}
@@ -1445,16 +1523,35 @@ func pickZones(ctx context.Context, gceCS *GCEControllerServer, top *csi.Topolog
14451523
return zones, nil
14461524
}
14471525

1448-
func getDefaultZonesInRegion(ctx context.Context, gceCS *GCEControllerServer, existingZones []string, numZones int) ([]string, error) {
1526+
func getDefaultZonesInRegion(ctx context.Context, gceCS *GCEControllerServer, existingZones []string, numZones int, locationTopReq *locationRequirements) ([]string, error) {
14491527
region, err := common.GetRegionFromZones(existingZones)
14501528
if err != nil {
14511529
return nil, fmt.Errorf("failed to get region from zones: %w", err)
14521530
}
14531531
needToGet := numZones - len(existingZones)
1532+
14541533
totZones, err := gceCS.CloudProvider.ListZones(ctx, region)
14551534
if err != nil {
14561535
return nil, fmt.Errorf("failed to list zones from cloud provider: %w", err)
14571536
}
1537+
1538+
if locationTopReq != nil {
1539+
if region != locationTopReq.region {
1540+
return nil, fmt.Errorf("default region %s does not match source volume region %s", region, locationTopReq.region)
1541+
}
1542+
switch locationTopReq.replicationType {
1543+
// For zonal -> zonal cloning, the source disk zone must match the destination disk zone.
1544+
case replicationTypeNone:
1545+
if slices.Contains(totZones, locationTopReq.zone) {
1546+
return []string{locationTopReq.zone}, nil
1547+
}
1548+
return nil, fmt.Errorf("failed to find zone in %v that satisfies location requirements %v", totZones, locationTopReq)
1549+
// for zonal or regional -> regional disk cloning, the source disk region must match the destination disk region.
1550+
case replicationTypeRegionalPD:
1551+
pickZonesInRegion(locationTopReq.region, totZones)
1552+
}
1553+
}
1554+
14581555
remainingZones := sets.NewString(totZones...).Difference(sets.NewString(existingZones...))
14591556
l := remainingZones.List()
14601557
if len(l) < needToGet {

0 commit comments

Comments
 (0)