Skip to content

Automated cherry pick of #933: Add support for NVMe NodePublishVolume #941

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 10 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ RUN GOARCH=$(echo $TARGETPLATFORM | cut -f2 -d '/') GCE_PD_CSI_STAGING_VERSION=$
# Start from Kubernetes Debian base.
FROM k8s.gcr.io/build-image/debian-base:buster-v1.9.0 as debian
# Install necessary dependencies
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
# Since we're leveraging apt to pull in dependencies, we use `gcr.io/distroless/base` because it includes glibc.
FROM gcr.io/distroless/base-debian11
# Copy necessary dependencies into distroless base.
Expand All @@ -50,6 +51,14 @@ COPY --from=debian /sbin/xfs_repair /sbin/xfs_repair
COPY --from=debian /usr/include/xfs /usr/include/xfs
COPY --from=debian /usr/lib/xfsprogs/xfs* /usr/lib/xfsprogs/
COPY --from=debian /usr/sbin/xfs* /usr/sbin/
# Add dependencies for /lib/udev_containerized/google_nvme_id script
COPY --from=debian /usr/sbin/nvme /usr/sbin/nvme
COPY --from=debian /usr/bin/xxd /usr/bin/xxd
COPY --from=debian /bin/bash /bin/bash
COPY --from=debian /bin/date /bin/date
COPY --from=debian /bin/grep /bin/grep
COPY --from=debian /bin/sed /bin/sed
COPY --from=debian /bin/ln /bin/ln

# Copy x86 shared libraries into distroless base.
COPY --from=debian /lib/x86_64-linux-gnu/libblkid.so.1 /lib/x86_64-linux-gnu/libblkid.so.1
Expand Down
6 changes: 5 additions & 1 deletion Dockerfile.arm64
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,11 @@ RUN clean-install udev
FROM k8s.gcr.io/build-image/debian-base:buster-v1.9.0
COPY --from=builder /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver /gce-pd-csi-driver
# Install necessary dependencies
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
COPY --from=mad-hack /lib/udev/scsi_id /lib/udev_containerized/scsi_id

# Copy google_nvme_id script, which is needed for NVMe support.
COPY deploy/kubernetes/udev/google_nvme_id /lib/udev_containerized/google_nvme_id

ENTRYPOINT ["/gce-pd-csi-driver"]
189 changes: 137 additions & 52 deletions pkg/mount-manager/device-utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ const (
diskPartitionSuffix = "-part"
diskSDPath = "/dev/sd"
diskSDPattern = "/dev/sd*"
diskNvmePath = "/dev/nvme"
diskNvmePattern = "/dev/nvme*"
// How many times to retry for a consistent read of /proc/mounts.
maxListTries = 3
// Number of fields per line in /proc/mounts as per the fstab man page.
Expand All @@ -52,11 +54,20 @@ const (
// scsi_id output should be in the form of:
// 0Google PersistentDisk <disk name>
scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$`
// google_nvme_id output should be in the form of:
// ID_SERIAL_SHORT=<disk name>
// Note: The google_nvme_id tool prints out multiple lines, hence we don't
// use '^' and '$' to wrap nvmePattern as is done in scsiPattern.
nvmePattern = `ID_SERIAL_SHORT=([\S]+)\s*`
scsiIdPath = "/lib/udev_containerized/scsi_id"
nvmeIdPath = "/lib/udev_containerized/google_nvme_id"
)

var (
// regex to parse scsi_id output and extract the serial
scsiRegex = regexp.MustCompile(scsiPattern)
// regex to parse google_nvme_id output and extract the serial
nvmeRegex = regexp.MustCompile(nvmePattern)
)

// DeviceUtils are a collection of methods that act on the devices attached
Expand Down Expand Up @@ -107,13 +118,13 @@ func existingDevicePath(devicePaths []string) (string, error) {
return "", nil
}

// getScsiSerial assumes that /lib/udev/scsi_id exists and will error if it
// getScsiSerial assumes that scsiIdPath exists and will error if it
// doesnt. It is the callers responsibility to verify the existence of this
// tool. Calls scsi_id on the given devicePath to get the serial number reported
// by that device.
func getScsiSerial(devicePath string) (string, error) {
out, err := exec.Command(
"/lib/udev_containerized/scsi_id",
scsiIdPath,
"--page=0x83",
"--whitelisted",
fmt.Sprintf("--device=%v", devicePath)).CombinedOutput()
Expand All @@ -134,9 +145,58 @@ func parseScsiSerial(output string) (string, error) {
return substrings[1], nil
}

// getNvmeSerial calls google_nvme_id on the given devicePath to get the serial
// number reported by that device.
// NOTE: getNvmeSerial assumes that nvmeIdPath exists and will error if it
// doesn't. It is the caller's responsibility to verify the existence of this
// tool.
func getNvmeSerial(devicePath string) (string, error) {
out, err := exec.Command(
nvmeIdPath,
fmt.Sprintf("-d%s", devicePath)).CombinedOutput()
if err != nil {
return "", fmt.Errorf("google_nvme_id failed for device %q with output %v: %v", devicePath, out, err)
}

return parseNvmeSerial(string(out))
}

// Parse the output returned by google_nvme_id and extract the serial number
func parseNvmeSerial(output string) (string, error) {
substrings := nvmeRegex.FindStringSubmatch(output)
if substrings == nil {
return "", fmt.Errorf("google_nvme_id output cannot be parsed: %q", output)
}

return substrings[1], nil
}

func ensureUdevToolExists(toolPath string) error {
exists, err := pathutils.Exists(pathutils.CheckFollowSymlink, toolPath)
if err != nil {
return fmt.Errorf("failed to check existence of %q: %v", toolPath, err)
}
if !exists {
// The driver should be containerized with the tool so maybe something is
// wrong with the build process
return fmt.Errorf("could not find tool at %q, unable to verify device paths", nvmeIdPath)
}
return nil
}

func ensureUdevToolsExist() error {
if err := ensureUdevToolExists(scsiIdPath); err != nil {
return err
}
if err := ensureUdevToolExists(nvmeIdPath); err != nil {
return err
}
return nil
}

// VerifyDevicePath returns the first devicePath that maps to a real disk in the
// candidate devicePaths or an empty string if none is found. If
// /lib/udev_containerized/scsi_id exists it will attempt to fix any issues
// candidate devicePaths or an empty string if none is found.
// If the device is not found, it will attempt to fix any issues
// caused by missing paths or mismatched devices by running a udevadm --trigger.
func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string) (string, error) {
var devicePath string
Expand All @@ -146,15 +206,10 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
pollTimeout = 3 * time.Second
)

scsiIDPath := "/lib/udev_containerized/scsi_id"
exists, err := pathutils.Exists(pathutils.CheckFollowSymlink, scsiIDPath)
// Ensure tools in /lib/udev_containerized directory exist
err = ensureUdevToolsExist()
if err != nil {
return "", fmt.Errorf("failed to check scsi_id existence: %v", err)
}
if !exists {
// No SCSI ID tool, the driver should be containerized with the tool so
// maybe something is wrong with the build process
return "", fmt.Errorf("could not find scsi_id tool at %s, unable to verify device paths", scsiIDPath)
return "", err
}

err = wait.Poll(pollInterval, pollTimeout, func() (bool, error) {
Expand All @@ -166,40 +221,41 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
}

if len(devicePath) == 0 {
// Couldn't find the path so we need to find a /dev/sdx with the SCSI
// serial that matches deviceName. Then we run udevadm trigger on that
// device to get the device to show up in /dev/by-id/
// Couldn't find a /dev/disk/by-id path for this deviceName, so we need to
// find a /dev/* with a serial that matches deviceName. Then we attempt
// to repair the symlink.
innerErr := udevadmTriggerForDiskIfExists(deviceName)
if innerErr != nil {
return false, fmt.Errorf("failed to trigger udevadm fix: %v", innerErr)
return false, fmt.Errorf("failed to trigger udevadm fix of non existent disk for %q: %v", deviceName, innerErr)
}
// Go to next retry loop to get the deviceName again after
// potentially fixing it with the udev command
return false, nil
}

// If there exists a devicePath we make sure disk at /dev/sdx matches the
// expected disk at devicePath by matching SCSI Serial to the disk name
devSDX, innerErr := filepath.EvalSymlinks(devicePath)
// If there exists a devicePath we make sure disk at /dev/* matches the
// expected disk at devicePath by matching device Serial to the disk name
devFsPath, innerErr := filepath.EvalSymlinks(devicePath)
if innerErr != nil {
return false, fmt.Errorf("filepath.EvalSymlinks(%q) failed with %v", devicePath, innerErr)
}
// Check to make sure device path maps to the correct disk
if strings.Contains(devSDX, diskSDPath) {
scsiSerial, innerErr := getScsiSerial(devSDX)
if innerErr != nil {
return false, fmt.Errorf("couldn't get SCSI serial number for disk %s: %v", deviceName, innerErr)
}
// SUCCESS! devicePath points to a /dev/sdx that has a SCSI serial
// equivalent to our disk name
if scsiSerial == deviceName {
return true, nil
}

devFsSerial, innerErr := getDevFsSerial(devFsPath)
if innerErr != nil {
return false, fmt.Errorf("couldn't get serial number for disk %s at path %s: %v", deviceName, devFsPath, innerErr)
}
// SUCCESS! devicePath points to a /dev/* path that has a serial
// equivalent to our disk name
if len(devFsSerial) != 0 && devFsSerial == deviceName {
return true, nil
}
// The devicePath is not mapped to the correct disk

// A /dev/* path exists, but is either not a recognized /dev prefix type
// (/dev/nvme* or /dev/sd*) or devicePath is not mapped to the correct disk.
// Attempt a repair
innerErr = udevadmTriggerForDiskIfExists(deviceName)
if innerErr != nil {
return false, fmt.Errorf("failed to trigger udevadm fix: %v", innerErr)
return false, fmt.Errorf("failed to trigger udevadm fix of misconfigured disk for %q: %v", deviceName, innerErr)
}
// Go to next retry loop to get the deviceName again after
// potentially fixing it with the udev command
Expand All @@ -213,49 +269,78 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
return devicePath, nil
}

// getDevFsSerial returns the serial number of the /dev/* path at devFsPath.
// If devFsPath does not start with a known prefix, returns the empty string.
func getDevFsSerial(devFsPath string) (string, error) {
switch {
case strings.HasPrefix(devFsPath, diskSDPath):
return getScsiSerial(devFsPath)
case strings.HasPrefix(devFsPath, diskNvmePath):
return getNvmeSerial(devFsPath)
default:
return "", nil
}
}

func findAvailableDevFsPaths() ([]string, error) {
diskSDPaths, err := filepath.Glob(diskSDPattern)
if err != nil {
return nil, fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskSDPattern, err)
}
diskNvmePaths, err := filepath.Glob(diskNvmePattern)
if err != nil {
return nil, fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskNvmePattern, err)
}
return append(diskSDPaths, diskNvmePaths...), nil
}

func udevadmTriggerForDiskIfExists(deviceName string) error {
devToSCSI := map[string]string{}
sds, err := filepath.Glob(diskSDPattern)
devFsPathToSerial := map[string]string{}
devFsPaths, err := findAvailableDevFsPaths()
if err != nil {
return fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskSDPattern, err)
return err
}
for _, devSDX := range sds {
scsiSerial, err := getScsiSerial(devSDX)
if err != nil {
return fmt.Errorf("failed to get SCSI Serial num: %v", err)
for _, devFsPath := range devFsPaths {
devFsSerial, err := getDevFsSerial(devFsPath)
if err != nil || len(devFsSerial) == 0 {
// If we get an error, ignore. Either this isn't a block device, or it
// isn't something we can get a serial number from
klog.V(7).Infof("failed to get Serial num for disk %s at path %s: %v", deviceName, devFsPath, err)
continue
}
devToSCSI[devSDX] = scsiSerial
if scsiSerial == deviceName {
devFsPathToSerial[devFsPath] = devFsSerial
if devFsSerial == deviceName {
// Found the disk that we're looking for so run a trigger on it
// to resolve its /dev/by-id/ path
klog.Warningf("udevadm --trigger running to fix disk at path %s which has SCSI ID %s", devSDX, scsiSerial)
err := udevadmChangeToDrive(devSDX)
klog.Warningf("udevadm --trigger running to fix disk at path %s which has serial numberID %s", devFsPath, devFsSerial)
err := udevadmChangeToDrive(devFsPath)
if err != nil {
return fmt.Errorf("failed to fix disk which has SCSI ID %s: %v", scsiSerial, err)
return fmt.Errorf("failed to fix disk which has serial numberID %s: %v", devFsSerial, err)
}
return nil
}
}
klog.Warningf("udevadm --trigger requested to fix disk %s but no such disk was found in %v", deviceName, devToSCSI)
klog.Warningf("udevadm --trigger requested to fix disk %s but no such disk was found in %v", deviceName, devFsPathToSerial)
return fmt.Errorf("udevadm --trigger requested to fix disk %s but no such disk was found", deviceName)
}

// Calls "udevadm trigger --action=change" on the specified drive. drivePath
// must be the block device path to trigger on, in the format "/dev/sd*", or a
// symlink to it. This is workaround for Issue #7972. Once the underlying issue
// has been resolved, this may be removed.
// must be the block device path to trigger on, in the format "/dev/*", or a
// symlink to it. This is workaround for Issue #7972
// (https://github.com/kubernetes/kubernetes/issues/7972). Once the underlying
// issue has been resolved, this may be removed.
// udevadm takes a little bit to work its magic in the background so any callers
// should not expect the trigger to complete instantly and may need to poll for
// the change
func udevadmChangeToDrive(devSDX string) error {
// Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/sd..."
func udevadmChangeToDrive(devFsPath string) error {
// Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/..."
out, err := exec.Command(
"udevadm",
"trigger",
"--action=change",
fmt.Sprintf("--property-match=DEVNAME=%s", devSDX)).CombinedOutput()
fmt.Sprintf("--property-match=DEVNAME=%s", devFsPath)).CombinedOutput()
if err != nil {
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v.", devSDX, string(out), err)
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v.", devFsPath, string(out), err)
}
return nil
}
Expand Down
47 changes: 47 additions & 0 deletions pkg/mount-manager/device-utils_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package mountmanager

import (
"testing"
)

func TestParseNvmeSerial(t *testing.T) {
testCases := []struct {
name string
output string
serial string
expErr bool
}{
{
name: "valid google_nvme_id response",
output: "line 58: warning: command substitution: ignored null byte in input\nID_SERIAL_SHORT=pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9\nID_SERIAL=Google_PersistentDisk_pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9",
serial: "pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9",
expErr: false,
},
{
name: "valid google_nvme_id boot disk response",
output: "line 58: warning: command substitution: ignored null byte in input\nID_SERIAL_SHORT=persistent-disk-0\nID_SERIAL=Google_PersistentDisk_persistent-disk-0",
serial: "persistent-disk-0",
expErr: false,
},
{
name: "invalid google_nvme_id response",
output: "Error: requesting namespace-id from non-block device\nNVMe Status:INVALID_NS: The namespace or the format of that namespace is invalid(b) NSID:0\nxxd: sorry cannot seek.\n[2022-03-12T04:17:17+0000]: NVMe Vendor Extension disk information not present",
serial: "",
expErr: true,
},
}

for _, tc := range testCases {
t.Logf("Running test: %v", tc.name)
actualSerial, err := parseNvmeSerial(tc.output)
if tc.expErr && err == nil {
t.Fatalf("Expected error but didn't get any")
}
if !tc.expErr && err != nil {
t.Fatalf("Got unexpected error: %s", err)
}
if actualSerial != tc.serial {
t.Fatalf("Expected '%s' but got '%s'", tc.serial, actualSerial)
}
}
}
5 changes: 5 additions & 0 deletions test/e2e/tests/setup_e2e_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,11 @@ var _ = BeforeSuite(func() {
klog.Fatalf("could not copy scsi_id to containerized directory: %v", err)
}

err = testutils.CopyFile(i, "/lib/udev/google_nvme_id", "/lib/udev_containerized/google_nvme_id")
if err != nil {
klog.Fatalf("could not copy google_nvme_id to containerized directory: %v", err)
}

klog.Infof("Creating new driver and client for node %s\n", i.GetName())
// Create new driver and client
testContext, err := testutils.GCEClientAndDriverSetup(i)
Expand Down
Loading