Skip to content

Resolve disk name collisions for attached regional disks #111

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions pkg/common/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ const (
nodeIDZoneValue = 0
nodeIDNameValue = 1
nodeIDTotalElements = 2

regionalDeviceNameSuffix = "_regional"
)

func BytesToGb(bytes int64) int64 {
Expand Down Expand Up @@ -90,3 +92,14 @@ func GetRegionFromZones(zones []string) (string, error) {
}
return regions.UnsortedList()[0], nil
}

func GetDeviceName(volKey *meta.Key) (string, error) {
switch volKey.Type() {
case meta.Zonal:
return volKey.Name, nil
case meta.Regional:
return volKey.Name + regionalDeviceNameSuffix, nil
default:
return "", fmt.Errorf("volume key %v neither zonal nor regional", volKey.Name)
}
}
13 changes: 11 additions & 2 deletions pkg/gce-cloud-provider/compute/gce-compute.go
Original file line number Diff line number Diff line change
Expand Up @@ -300,8 +300,12 @@ func (cloud *CloudProvider) deleteRegionalDisk(ctx context.Context, region, name
func (cloud *CloudProvider) AttachDisk(ctx context.Context, disk *CloudDisk, volKey *meta.Key, readWrite, diskType, instanceZone, instanceName string) error {
source := cloud.GetDiskSourceURI(disk, volKey)

deviceName, err := common.GetDeviceName(volKey)
if err != nil {
return fmt.Errorf("failed to get device name: %v", err)
}
attachedDiskV1 := &compute.AttachedDisk{
DeviceName: disk.GetName(),
DeviceName: deviceName,
Kind: disk.GetKind(),
Mode: readWrite,
Source: source,
Expand All @@ -320,7 +324,12 @@ func (cloud *CloudProvider) AttachDisk(ctx context.Context, disk *CloudDisk, vol
}

func (cloud *CloudProvider) DetachDisk(ctx context.Context, volKey *meta.Key, instanceZone, instanceName string) error {
op, err := cloud.service.Instances.DetachDisk(cloud.project, instanceZone, instanceName, volKey.Name).Context(ctx).Do()
deviceName, err := common.GetDeviceName(volKey)
if err != nil {
return err
}

op, err := cloud.service.Instances.DetachDisk(cloud.project, instanceZone, instanceName, deviceName).Context(ctx).Do()
if err != nil {
return err
}
Expand Down
26 changes: 16 additions & 10 deletions pkg/gce-pd-csi-driver/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,12 @@ func (gceCS *GCEControllerServer) ControllerPublishVolume(ctx context.Context, r
readWrite = "READ_ONLY"
}

attached, err := diskIsAttachedAndCompatible(disk, instance, volumeCapability, readWrite)
deviceName, err := common.GetDeviceName(volKey)
if err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("error getting device name: %v", err))
}

attached, err := diskIsAttachedAndCompatible(deviceName, instance, volumeCapability, readWrite)
if err != nil {
return nil, status.Error(codes.AlreadyExists, fmt.Sprintf("Disk %v already published to node %v but incompatbile: %v", volKey.Name, nodeID, err))
}
Expand Down Expand Up @@ -298,10 +303,6 @@ func (gceCS *GCEControllerServer) ControllerUnpublishVolume(ctx context.Context,
return nil, err
}

disk, err := gceCS.CloudProvider.GetDisk(ctx, volKey)
if err != nil {
return nil, err
}
instanceZone, instanceName, err := common.NodeIDToZoneAndName(nodeID)
if err != nil {
return nil, status.Error(codes.InvalidArgument, fmt.Sprintf("could not split nodeID: %v", err))
Expand All @@ -311,7 +312,12 @@ func (gceCS *GCEControllerServer) ControllerUnpublishVolume(ctx context.Context,
return nil, err
}

attached := diskIsAttached(disk, instance)
deviceName, err := common.GetDeviceName(volKey)
if err != nil {
return nil, status.Error(codes.Internal, fmt.Sprintf("error getting device name: %v", err))
}

attached := diskIsAttached(deviceName, instance)

if !attached {
// Volume is not attached to node. Success!
Expand Down Expand Up @@ -473,19 +479,19 @@ func getRequestCapacity(capRange *csi.CapacityRange) (int64, error) {
return capBytes, nil
}

func diskIsAttached(volume *gce.CloudDisk, instance *compute.Instance) bool {
func diskIsAttached(deviceName string, instance *compute.Instance) bool {
for _, disk := range instance.Disks {
if disk.DeviceName == volume.GetName() {
if disk.DeviceName == deviceName {
// Disk is attached to node
return true
}
}
return false
}

func diskIsAttachedAndCompatible(volume *gce.CloudDisk, instance *compute.Instance, volumeCapability *csi.VolumeCapability, readWrite string) (bool, error) {
func diskIsAttachedAndCompatible(deviceName string, instance *compute.Instance, volumeCapability *csi.VolumeCapability, readWrite string) (bool, error) {
for _, disk := range instance.Disks {
if disk.DeviceName == volume.GetName() {
if disk.DeviceName == deviceName {
// Disk is attached to node
if disk.Mode != readWrite {
return true, fmt.Errorf("disk mode does not match. Got %v. Want %v", disk.Mode, readWrite)
Expand Down
38 changes: 14 additions & 24 deletions pkg/gce-pd-csi-driver/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -604,15 +604,13 @@ func TestGetRequestCapacity(t *testing.T) {
func TestDiskIsAttached(t *testing.T) {
testCases := []struct {
name string
disk *compute.Disk
deviceName string
instance *compute.Instance
expAttached bool
}{
{
name: "normal-attached",
disk: &compute.Disk{
Name: "test-disk",
},
name: "normal-attached",
deviceName: "test-disk",
instance: &compute.Instance{
Disks: []*compute.AttachedDisk{
{
Expand All @@ -623,10 +621,8 @@ func TestDiskIsAttached(t *testing.T) {
expAttached: true,
},
{
name: "normal-not-attached",
disk: &compute.Disk{
Name: "test-disk",
},
name: "normal-not-attached",
deviceName: "test-disk",
instance: &compute.Instance{
Disks: []*compute.AttachedDisk{
{
Expand All @@ -639,7 +635,7 @@ func TestDiskIsAttached(t *testing.T) {
}
for _, tc := range testCases {
t.Logf("test case: %s", tc.name)
if attached := diskIsAttached(gce.ZonalCloudDisk(tc.disk), tc.instance); attached != tc.expAttached {
if attached := diskIsAttached(tc.deviceName, tc.instance); attached != tc.expAttached {
t.Errorf("Expected disk attached to be %v, but got %v", tc.expAttached, attached)
}
}
Expand All @@ -648,17 +644,15 @@ func TestDiskIsAttached(t *testing.T) {
func TestDiskIsAttachedAndCompatible(t *testing.T) {
testCases := []struct {
name string
disk *compute.Disk
deviceName string
instance *compute.Instance
mode string
expAttached bool
expErr bool
}{
{
name: "normal-attached",
disk: &compute.Disk{
Name: "test-disk",
},
name: "normal-attached",
deviceName: "test-disk",
instance: &compute.Instance{
Disks: []*compute.AttachedDisk{
{
Expand All @@ -671,10 +665,8 @@ func TestDiskIsAttachedAndCompatible(t *testing.T) {
expAttached: true,
},
{
name: "normal-not-attached",
disk: &compute.Disk{
Name: "test-disk",
},
name: "normal-not-attached",
deviceName: "test-disk",
instance: &compute.Instance{
Disks: []*compute.AttachedDisk{
{
Expand All @@ -687,10 +679,8 @@ func TestDiskIsAttachedAndCompatible(t *testing.T) {
expAttached: false,
},
{
name: "incompatible mode",
disk: &compute.Disk{
Name: "test-disk",
},
name: "incompatible mode",
deviceName: "test-disk",
instance: &compute.Instance{
Disks: []*compute.AttachedDisk{
{
Expand All @@ -706,7 +696,7 @@ func TestDiskIsAttachedAndCompatible(t *testing.T) {
}
for _, tc := range testCases {
t.Logf("test case: %s", tc.name)
attached, err := diskIsAttachedAndCompatible(gce.ZonalCloudDisk(tc.disk), tc.instance, nil, tc.mode)
attached, err := diskIsAttachedAndCompatible(tc.deviceName, tc.instance, nil, tc.mode)
if err != nil && !tc.expErr {
t.Errorf("Did not expect error but got: %v", err)
}
Expand Down
7 changes: 6 additions & 1 deletion pkg/gce-pd-csi-driver/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,12 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
// TODO(#83): Get real partitions
partition := ""

devicePaths := ns.DeviceUtils.GetDiskByIdPaths(volumeKey.Name, partition)
deviceName, err := common.GetDeviceName(volumeKey)
if err != nil {
status.Error(codes.Internal, fmt.Sprintf("error getting device name: %v", err))
}

devicePaths := ns.DeviceUtils.GetDiskByIdPaths(deviceName, partition)
devicePath, err := ns.DeviceUtils.VerifyDevicePath(devicePaths)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

did you merge that udev fix that used scsi_id and validated the device name? I wonder if that will have issues with this change.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm I'm not really sure what you're talking about. I have a vague memory of something like that but I can't find a PR about it...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

do you mean the change you made to k8s/k8s where you try to rerun udevadm trigger a bunch of times after doing some file parsing for validation? If so, then no I haven't merged that

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ok we may have to revisit this in that PR then


if err != nil {
Expand Down
12 changes: 6 additions & 6 deletions pkg/mount-manager/device-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ const (
type DeviceUtils interface {
// GetDiskByIdPaths returns a list of all possible paths for a
// given Persistent Disk
GetDiskByIdPaths(pdName string, partition string) []string
GetDiskByIdPaths(deviceName string, partition string) []string

// VerifyDevicePath returns the first of the list of device paths that
// exists on the machine, or an empty string if none exists
Expand All @@ -71,12 +71,12 @@ func NewDeviceUtils() *deviceUtils {
}

// Returns list of all /dev/disk/by-id/* paths for given PD.
func (m *deviceUtils) GetDiskByIdPaths(pdName string, partition string) []string {
func (m *deviceUtils) GetDiskByIdPaths(deviceName string, partition string) []string {
devicePaths := []string{
path.Join(diskByIdPath, diskGooglePrefix+pdName),
path.Join(diskByIdPath, diskScsiGooglePrefix+pdName),
path.Join(diskByHostIdPath, diskScsiGooglePrefix+pdName),
path.Join(diskByHostIdPath, diskScsiGooglePrefix+pdName),
path.Join(diskByIdPath, diskGooglePrefix+deviceName),
path.Join(diskByIdPath, diskScsiGooglePrefix+deviceName),
path.Join(diskByHostIdPath, diskScsiGooglePrefix+deviceName),
path.Join(diskByHostIdPath, diskScsiGooglePrefix+deviceName),
}

if partition != "" {
Expand Down
3 changes: 1 addition & 2 deletions test/run-e2e-local.sh
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
#!/bin/bash

#set -o nounset
set -o nounset
set -o errexit

readonly PKGDIR=sigs.k8s.io/gcp-compute-persistent-disk-csi-driver


ginkgo -v "test/e2e/tests" --logtostderr -- --project ${PROJECT} --service-account ${IAM_NAME}