-
Notifications
You must be signed in to change notification settings - Fork 159
Update VerifyDevicePath logic to run udevadm --trigger to fix scenarios where disk is not found or is wrong device #459
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -20,10 +20,13 @@ import ( | |
"os/exec" | ||
"path" | ||
"path/filepath" | ||
"regexp" | ||
"strings" | ||
"time" | ||
|
||
"k8s.io/apimachinery/pkg/util/sets" | ||
"k8s.io/apimachinery/pkg/util/wait" | ||
"k8s.io/klog" | ||
pathutils "k8s.io/utils/path" | ||
) | ||
|
||
const ( | ||
|
@@ -46,6 +49,14 @@ const ( | |
// 'fsck' found errors but exited without correcting them | ||
fsckErrorsUncorrected = 4 | ||
defaultMountCommand = "mount" | ||
// scsi_id output should be in the form of: | ||
// 0Google PersistentDisk <disk name> | ||
scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$` | ||
) | ||
|
||
var ( | ||
// regex to parse scsi_id output and extract the serial | ||
scsiRegex = regexp.MustCompile(scsiPattern) | ||
) | ||
|
||
// DeviceUtils are a collection of methods that act on the devices attached | ||
|
@@ -57,7 +68,7 @@ type DeviceUtils interface { | |
|
||
// VerifyDevicePath returns the first of the list of device paths that | ||
// exists on the machine, or an empty string if none exists | ||
VerifyDevicePath(devicePaths []string) (string, error) | ||
VerifyDevicePath(devicePaths []string, diskName string) (string, error) | ||
} | ||
|
||
type deviceUtils struct { | ||
|
@@ -85,74 +96,163 @@ func (m *deviceUtils) GetDiskByIdPaths(deviceName string, partition string) []st | |
return devicePaths | ||
} | ||
|
||
// Returns the first path that exists, or empty string if none exist. | ||
func (m *deviceUtils) VerifyDevicePath(devicePaths []string) (string, error) { | ||
sdBefore, err := filepath.Glob(diskSDPattern) | ||
if err != nil { | ||
// Seeing this error means that the diskSDPattern is malformed. | ||
klog.Errorf("Error filepath.Glob(\"%s\"): %v\r\n", diskSDPattern, err) | ||
func existingDevicePath(devicePaths []string) (string, error) { | ||
for _, devicePath := range devicePaths { | ||
if pathExists, err := pathExists(devicePath); err != nil { | ||
return "", fmt.Errorf("error checking if path exists: %v", err) | ||
} else if pathExists { | ||
return devicePath, nil | ||
} | ||
} | ||
sdBeforeSet := sets.NewString(sdBefore...) | ||
// TODO(#69): Verify udevadm works as intended in driver | ||
if err := udevadmChangeToNewDrives(sdBeforeSet); err != nil { | ||
// udevadm errors should not block disk detachment, log and continue | ||
klog.Errorf("udevadmChangeToNewDrives failed with: %v", err) | ||
return "", nil | ||
} | ||
|
||
// getScsiSerial assumes that /lib/udev/scsi_id exists and will error if it | ||
davidz627 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// doesnt. It is the callers responsibility to verify the existence of this | ||
// tool. Calls scsi_id on the given devicePath to get the serial number reported | ||
// by that device. | ||
func getScsiSerial(devicePath string) (string, error) { | ||
out, err := exec.Command( | ||
"/lib/udev_containerized/scsi_id", | ||
"--page=0x83", | ||
"--whitelisted", | ||
fmt.Sprintf("--device=%v", devicePath)).CombinedOutput() | ||
if err != nil { | ||
return "", fmt.Errorf("scsi_id failed for device %q with output %s: %v", devicePath, string(out), err) | ||
} | ||
|
||
for _, path := range devicePaths { | ||
if pathExists, err := pathExists(path); err != nil { | ||
return "", fmt.Errorf("Error checking if path exists: %v", err) | ||
} else if pathExists { | ||
return path, nil | ||
} | ||
return parseScsiSerial(string(out)) | ||
} | ||
|
||
// Parse the output returned by scsi_id and extract the serial number | ||
func parseScsiSerial(output string) (string, error) { | ||
substrings := scsiRegex.FindStringSubmatch(output) | ||
if substrings == nil { | ||
return "", fmt.Errorf("scsi_id output cannot be parsed: %q", output) | ||
} | ||
|
||
return "", nil | ||
return substrings[1], nil | ||
} | ||
|
||
// Triggers the application of udev rules by calling "udevadm trigger | ||
// --action=change" for newly created "/dev/sd*" drives (exist only in | ||
// after set). This is workaround for Issue #7972. Once the underlying | ||
// issue has been resolved, this may be removed. | ||
func udevadmChangeToNewDrives(sdBeforeSet sets.String) error { | ||
sdAfter, err := filepath.Glob(diskSDPattern) | ||
// VerifyDevicePath returns the first devicePath that maps to a real disk in the | ||
// candidate devicePaths or an empty string if none is found. If | ||
// /lib/udev_containerized/scsi_id exists it will attempt to fix any issues | ||
// caused by missing paths or mismatched devices by running a udevadm --trigger. | ||
func (m *deviceUtils) VerifyDevicePath(devicePaths []string, diskName string) (string, error) { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do we need devicePaths as an argument, or can we generate it inside this function? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I guess we don't really have to, but then we would need to add partition as an argument. (I can follow up but probably out of scope for this PR) |
||
var devicePath string | ||
var err error | ||
const ( | ||
pollInterval = 500 * time.Millisecond | ||
pollTimeout = 3 * time.Second | ||
) | ||
|
||
scsiIDPath := "/lib/udev_containerized/scsi_id" | ||
exists, err := pathutils.Exists(pathutils.CheckFollowSymlink, scsiIDPath) | ||
if err != nil { | ||
return fmt.Errorf("Error filepath.Glob(\"%s\"): %v\r\n", diskSDPattern, err) | ||
return "", fmt.Errorf("failed to check scsi_id existence: %v", err) | ||
} | ||
if !exists { | ||
// No SCSI ID tool, the driver should be containerized with the tool so | ||
// maybe something is wrong with the build process | ||
return "", fmt.Errorf("could not find scsi_id tool at %s, unable to verify device paths", scsiIDPath) | ||
} | ||
|
||
for _, sd := range sdAfter { | ||
if !sdBeforeSet.Has(sd) { | ||
return udevadmChangeToDrive(sd) | ||
err = wait.Poll(pollInterval, pollTimeout, func() (bool, error) { | ||
var innerErr error | ||
|
||
devicePath, innerErr = existingDevicePath(devicePaths) | ||
davidz627 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if innerErr != nil { | ||
return false, fmt.Errorf("failed to check for existing device path: %v", innerErr) | ||
} | ||
|
||
if len(devicePath) == 0 { | ||
// Couldn't find the path so we need to find a /dev/sdx with the SCSI | ||
// serial that matches diskName. Then we run udevadm trigger on that | ||
// device to get the device to show up in /dev/by-id/ | ||
innerErr := udevadmTriggerForDiskIfExists(diskName) | ||
if innerErr != nil { | ||
return false, fmt.Errorf("failed to trigger udevadm fix for disk %s: %v", diskName, innerErr) | ||
} | ||
// Go to next retry loop to get the deviceName again after | ||
// potentially fixing it with the udev command | ||
return false, nil | ||
} | ||
|
||
// If there exists a devicePath we make sure disk at /dev/sdx matches the | ||
// expected disk at devicePath by matching SCSI Serial to the disk name | ||
devSDX, innerErr := filepath.EvalSymlinks(devicePath) | ||
if innerErr != nil { | ||
return false, fmt.Errorf("filepath.EvalSymlinks(%q) failed with %v", devicePath, innerErr) | ||
} | ||
// Check to make sure device path maps to the correct disk | ||
if strings.Contains(devSDX, diskSDPath) { | ||
scsiSerial, innerErr := getScsiSerial(devSDX) | ||
if innerErr != nil { | ||
return false, fmt.Errorf("couldn't get SCSI serial number for disk %s: %v", diskName, innerErr) | ||
} | ||
// SUCCESS! devicePath points to a /dev/sdx that has a SCSI serial | ||
// equivilant to our disk name | ||
if scsiSerial == diskName { | ||
return true, nil | ||
} | ||
} | ||
// The devicePath is not mapped to the correct disk | ||
innerErr = udevadmTriggerForDiskIfExists(diskName) | ||
if innerErr != nil { | ||
return false, fmt.Errorf("failed to trigger udevadm fix for disk %s: %v", diskName, innerErr) | ||
} | ||
// Go to next retry loop to get the deviceName again after | ||
// potentially fixing it with the udev command | ||
return false, nil | ||
}) | ||
|
||
if err != nil { | ||
return "", fmt.Errorf("failed to find and re-link disk %s with udevadm after retrying for %v: %v", diskName, pollTimeout, err) | ||
} | ||
|
||
return nil | ||
return devicePath, nil | ||
davidz627 marked this conversation as resolved.
Show resolved
Hide resolved
|
||
} | ||
|
||
// Calls "udevadm trigger --action=change" on the specified drive. | ||
// drivePath must be the block device path to trigger on, in the format "/dev/sd*", or a symlink to it. | ||
// This is workaround for Issue #7972. Once the underlying issue has been resolved, this may be removed. | ||
func udevadmChangeToDrive(drivePath string) error { | ||
klog.V(4).Infof("udevadmChangeToDrive: drive=%q", drivePath) | ||
|
||
// Evaluate symlink, if any | ||
drive, err := filepath.EvalSymlinks(drivePath) | ||
func udevadmTriggerForDiskIfExists(diskName string) error { | ||
sds, err := filepath.Glob(diskSDPattern) | ||
if err != nil { | ||
return fmt.Errorf("udevadmChangeToDrive: filepath.EvalSymlinks(%q) failed with %v.", drivePath, err) | ||
return fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskSDPattern, err) | ||
} | ||
// Check to make sure input is "/dev/sd*" | ||
if !strings.Contains(drive, diskSDPath) { | ||
return fmt.Errorf("udevadmChangeToDrive: expected input in the form \"%s\" but drive is %q.", diskSDPattern, drive) | ||
for _, devSDX := range sds { | ||
scsiSerial, err := getScsiSerial(devSDX) | ||
if err != nil { | ||
return fmt.Errorf("failed to get SCSI Serial num for %s: %v", devSDX, err) | ||
} | ||
if scsiSerial == diskName { | ||
// Found the disk that we're looking for so run a trigger on it | ||
// to resolve its /dev/by-id/ path | ||
klog.Warningf("udevadm --trigger running to fix disk at path %s which has SCSI ID %s", devSDX, scsiSerial) | ||
err := udevadmChangeToDrive(devSDX) | ||
if err != nil { | ||
return fmt.Errorf("failed to fix disk at path %s which has SCSI ID %s: %v", devSDX, scsiSerial, err) | ||
} | ||
return nil | ||
} | ||
} | ||
return fmt.Errorf("udevadm --trigger requested to fix disk %s but no such disk was found in %v", diskName, sds) | ||
} | ||
|
||
// Calls "udevadm trigger --action=change" on the specified drive. drivePath | ||
// must be the block device path to trigger on, in the format "/dev/sd*", or a | ||
// symlink to it. This is workaround for Issue #7972. Once the underlying issue | ||
// has been resolved, this may be removed. | ||
// udevadm takes a little bit to work its magic in the background so any callers | ||
// should not expect the trigger to complete instantly and may need to poll for | ||
// the change | ||
func udevadmChangeToDrive(devSDX string) error { | ||
// Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/sd..." | ||
_, err = exec.Command( | ||
out, err := exec.Command( | ||
"udevadm", | ||
"trigger", | ||
"--action=change", | ||
fmt.Sprintf("--property-match=DEVNAME=%s", drive)).CombinedOutput() | ||
fmt.Sprintf("--property-match=DEVNAME=%s", devSDX)).CombinedOutput() | ||
if err != nil { | ||
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with %v.", drive, err) | ||
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v.", devSDX, string(out), err) | ||
} | ||
return nil | ||
} | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
we had two init functions before, I don't know if that was causing issues but it doesn't hurt to fix