Skip to content

Commit b614d93

Browse files
committed
Add support for NVMe NodePublishVolume
1 parent b63bafb commit b614d93

File tree

8 files changed

+319
-78
lines changed

8 files changed

+319
-78
lines changed

Dockerfile

+10-1
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ RUN GOARCH=$(echo $TARGETPLATFORM | cut -f2 -d '/') GCE_PD_CSI_STAGING_VERSION=$
2626
# Start from Kubernetes Debian base.
2727
FROM k8s.gcr.io/build-image/debian-base:buster-v1.9.0 as debian
2828
# Install necessary dependencies
29-
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs
29+
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
30+
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
3031
# Since we're leveraging apt to pull in dependencies, we use `gcr.io/distroless/base` because it includes glibc.
3132
FROM gcr.io/distroless/base-debian11
3233
# Copy necessary dependencies into distroless base.
@@ -50,6 +51,14 @@ COPY --from=debian /sbin/xfs_repair /sbin/xfs_repair
5051
COPY --from=debian /usr/include/xfs /usr/include/xfs
5152
COPY --from=debian /usr/lib/xfsprogs/xfs* /usr/lib/xfsprogs/
5253
COPY --from=debian /usr/sbin/xfs* /usr/sbin/
54+
# Add dependencies for /lib/udev_containerized/google_nvme_id script
55+
COPY --from=debian /usr/sbin/nvme /usr/sbin/nvme
56+
COPY --from=debian /usr/bin/xxd /usr/bin/xxd
57+
COPY --from=debian /bin/bash /bin/bash
58+
COPY --from=debian /bin/date /bin/date
59+
COPY --from=debian /bin/grep /bin/grep
60+
COPY --from=debian /bin/sed /bin/sed
61+
COPY --from=debian /bin/ln /bin/ln
5362

5463
# Copy x86 shared libraries into distroless base.
5564
COPY --from=debian /lib/x86_64-linux-gnu/libblkid.so.1 /lib/x86_64-linux-gnu/libblkid.so.1

Dockerfile.arm64

+5-1
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,11 @@ RUN clean-install udev
3232
FROM k8s.gcr.io/build-image/debian-base:buster-v1.9.0
3333
COPY --from=builder /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver /gce-pd-csi-driver
3434
# Install necessary dependencies
35-
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs
35+
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
36+
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
3637
COPY --from=mad-hack /lib/udev/scsi_id /lib/udev_containerized/scsi_id
3738

39+
# Copy google_nvme_id script, which is needed for NVMe support.
40+
COPY deploy/kubernetes/udev/google_nvme_id /lib/udev_containerized/google_nvme_id
41+
3842
ENTRYPOINT ["/gce-pd-csi-driver"]

pkg/mount-manager/device-utils.go

+137-52
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ const (
3636
diskPartitionSuffix = "-part"
3737
diskSDPath = "/dev/sd"
3838
diskSDPattern = "/dev/sd*"
39+
diskNvmePath = "/dev/nvme"
40+
diskNvmePattern = "/dev/nvme*"
3941
// How many times to retry for a consistent read of /proc/mounts.
4042
maxListTries = 3
4143
// Number of fields per line in /proc/mounts as per the fstab man page.
@@ -52,11 +54,20 @@ const (
5254
// scsi_id output should be in the form of:
5355
// 0Google PersistentDisk <disk name>
5456
scsiPattern = `^0Google\s+PersistentDisk\s+([\S]+)\s*$`
57+
// google_nvme_id output should be in the form of:
58+
// ID_SERIAL_SHORT=<disk name>
59+
// Note: The google_nvme_id tool prints out multiple lines, hence we don't
60+
// use '^' and '$' to wrap nvmePattern as is done in scsiPattern.
61+
nvmePattern = `ID_SERIAL_SHORT=([\S]+)\s*`
62+
scsiIdPath = "/lib/udev_containerized/scsi_id"
63+
nvmeIdPath = "/lib/udev_containerized/google_nvme_id"
5564
)
5665

5766
var (
5867
// regex to parse scsi_id output and extract the serial
5968
scsiRegex = regexp.MustCompile(scsiPattern)
69+
// regex to parse google_nvme_id output and extract the serial
70+
nvmeRegex = regexp.MustCompile(nvmePattern)
6071
)
6172

6273
// DeviceUtils are a collection of methods that act on the devices attached
@@ -107,13 +118,13 @@ func existingDevicePath(devicePaths []string) (string, error) {
107118
return "", nil
108119
}
109120

110-
// getScsiSerial assumes that /lib/udev/scsi_id exists and will error if it
121+
// getScsiSerial assumes that scsiIdPath exists and will error if it
111122
// doesnt. It is the callers responsibility to verify the existence of this
112123
// tool. Calls scsi_id on the given devicePath to get the serial number reported
113124
// by that device.
114125
func getScsiSerial(devicePath string) (string, error) {
115126
out, err := exec.Command(
116-
"/lib/udev_containerized/scsi_id",
127+
scsiIdPath,
117128
"--page=0x83",
118129
"--whitelisted",
119130
fmt.Sprintf("--device=%v", devicePath)).CombinedOutput()
@@ -134,9 +145,58 @@ func parseScsiSerial(output string) (string, error) {
134145
return substrings[1], nil
135146
}
136147

148+
// getNvmeSerial calls google_nvme_id on the given devicePath to get the serial
149+
// number reported by that device.
150+
// NOTE: getNvmeSerial assumes that nvmeIdPath exists and will error if it
151+
// doesn't. It is the caller's responsibility to verify the existence of this
152+
// tool.
153+
func getNvmeSerial(devicePath string) (string, error) {
154+
out, err := exec.Command(
155+
nvmeIdPath,
156+
fmt.Sprintf("-d%s", devicePath)).CombinedOutput()
157+
if err != nil {
158+
return "", fmt.Errorf("google_nvme_id failed for device %q with output %v: %v", devicePath, out, err)
159+
}
160+
161+
return parseNvmeSerial(string(out))
162+
}
163+
164+
// Parse the output returned by google_nvme_id and extract the serial number
165+
func parseNvmeSerial(output string) (string, error) {
166+
substrings := nvmeRegex.FindStringSubmatch(output)
167+
if substrings == nil {
168+
return "", fmt.Errorf("google_nvme_id output cannot be parsed: %q", output)
169+
}
170+
171+
return substrings[1], nil
172+
}
173+
174+
func ensureUdevToolExists(toolPath string) error {
175+
exists, err := pathutils.Exists(pathutils.CheckFollowSymlink, toolPath)
176+
if err != nil {
177+
return fmt.Errorf("failed to check existence of %q: %v", toolPath, err)
178+
}
179+
if !exists {
180+
// The driver should be containerized with the tool so maybe something is
181+
// wrong with the build process
182+
return fmt.Errorf("could not find tool at %q, unable to verify device paths", nvmeIdPath)
183+
}
184+
return nil
185+
}
186+
187+
func ensureUdevToolsExist() error {
188+
if err := ensureUdevToolExists(scsiIdPath); err != nil {
189+
return err
190+
}
191+
if err := ensureUdevToolExists(nvmeIdPath); err != nil {
192+
return err
193+
}
194+
return nil
195+
}
196+
137197
// VerifyDevicePath returns the first devicePath that maps to a real disk in the
138-
// candidate devicePaths or an empty string if none is found. If
139-
// /lib/udev_containerized/scsi_id exists it will attempt to fix any issues
198+
// candidate devicePaths or an empty string if none is found.
199+
// If the device is not found, it will attempt to fix any issues
140200
// caused by missing paths or mismatched devices by running a udevadm --trigger.
141201
func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string) (string, error) {
142202
var devicePath string
@@ -146,15 +206,10 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
146206
pollTimeout = 3 * time.Second
147207
)
148208

149-
scsiIDPath := "/lib/udev_containerized/scsi_id"
150-
exists, err := pathutils.Exists(pathutils.CheckFollowSymlink, scsiIDPath)
209+
// Ensure tools in /lib/udev_containerized directory exist
210+
err = ensureUdevToolsExist()
151211
if err != nil {
152-
return "", fmt.Errorf("failed to check scsi_id existence: %v", err)
153-
}
154-
if !exists {
155-
// No SCSI ID tool, the driver should be containerized with the tool so
156-
// maybe something is wrong with the build process
157-
return "", fmt.Errorf("could not find scsi_id tool at %s, unable to verify device paths", scsiIDPath)
212+
return "", err
158213
}
159214

160215
err = wait.Poll(pollInterval, pollTimeout, func() (bool, error) {
@@ -166,40 +221,41 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
166221
}
167222

168223
if len(devicePath) == 0 {
169-
// Couldn't find the path so we need to find a /dev/sdx with the SCSI
170-
// serial that matches deviceName. Then we run udevadm trigger on that
171-
// device to get the device to show up in /dev/by-id/
224+
// Couldn't find a /dev/disk/by-id path for this deviceName, so we need to
225+
// find a /dev/* with a serial that matches deviceName. Then we attempt
226+
// to repair the symlink.
172227
innerErr := udevadmTriggerForDiskIfExists(deviceName)
173228
if innerErr != nil {
174-
return false, fmt.Errorf("failed to trigger udevadm fix: %v", innerErr)
229+
return false, fmt.Errorf("failed to trigger udevadm fix of non existent disk for %q: %v", deviceName, innerErr)
175230
}
176231
// Go to next retry loop to get the deviceName again after
177232
// potentially fixing it with the udev command
178233
return false, nil
179234
}
180235

181-
// If there exists a devicePath we make sure disk at /dev/sdx matches the
182-
// expected disk at devicePath by matching SCSI Serial to the disk name
183-
devSDX, innerErr := filepath.EvalSymlinks(devicePath)
236+
// If there exists a devicePath we make sure disk at /dev/* matches the
237+
// expected disk at devicePath by matching device Serial to the disk name
238+
devFsPath, innerErr := filepath.EvalSymlinks(devicePath)
184239
if innerErr != nil {
185240
return false, fmt.Errorf("filepath.EvalSymlinks(%q) failed with %v", devicePath, innerErr)
186241
}
187-
// Check to make sure device path maps to the correct disk
188-
if strings.Contains(devSDX, diskSDPath) {
189-
scsiSerial, innerErr := getScsiSerial(devSDX)
190-
if innerErr != nil {
191-
return false, fmt.Errorf("couldn't get SCSI serial number for disk %s: %v", deviceName, innerErr)
192-
}
193-
// SUCCESS! devicePath points to a /dev/sdx that has a SCSI serial
194-
// equivalent to our disk name
195-
if scsiSerial == deviceName {
196-
return true, nil
197-
}
242+
243+
devFsSerial, innerErr := getDevFsSerial(devFsPath)
244+
if innerErr != nil {
245+
return false, fmt.Errorf("couldn't get serial number for disk %s at path %s: %v", deviceName, devFsPath, innerErr)
246+
}
247+
// SUCCESS! devicePath points to a /dev/* path that has a serial
248+
// equivalent to our disk name
249+
if len(devFsSerial) != 0 && devFsSerial == deviceName {
250+
return true, nil
198251
}
199-
// The devicePath is not mapped to the correct disk
252+
253+
// A /dev/* path exists, but is either not a recognized /dev prefix type
254+
// (/dev/nvme* or /dev/sd*) or devicePath is not mapped to the correct disk.
255+
// Attempt a repair
200256
innerErr = udevadmTriggerForDiskIfExists(deviceName)
201257
if innerErr != nil {
202-
return false, fmt.Errorf("failed to trigger udevadm fix: %v", innerErr)
258+
return false, fmt.Errorf("failed to trigger udevadm fix of misconfigured disk for %q: %v", deviceName, innerErr)
203259
}
204260
// Go to next retry loop to get the deviceName again after
205261
// potentially fixing it with the udev command
@@ -213,49 +269,78 @@ func (m *deviceUtils) VerifyDevicePath(devicePaths []string, deviceName string)
213269
return devicePath, nil
214270
}
215271

272+
// getDevFsSerial returns the serial number of the /dev/* path at devFsPath.
273+
// If devFsPath does not start with a known prefix, returns the empty string.
274+
func getDevFsSerial(devFsPath string) (string, error) {
275+
switch {
276+
case strings.HasPrefix(devFsPath, diskSDPath):
277+
return getScsiSerial(devFsPath)
278+
case strings.HasPrefix(devFsPath, diskNvmePath):
279+
return getNvmeSerial(devFsPath)
280+
default:
281+
return "", nil
282+
}
283+
}
284+
285+
func findAvailableDevFsPaths() ([]string, error) {
286+
diskSDPaths, err := filepath.Glob(diskSDPattern)
287+
if err != nil {
288+
return nil, fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskSDPattern, err)
289+
}
290+
diskNvmePaths, err := filepath.Glob(diskNvmePattern)
291+
if err != nil {
292+
return nil, fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskNvmePattern, err)
293+
}
294+
return append(diskSDPaths, diskNvmePaths...), nil
295+
}
296+
216297
func udevadmTriggerForDiskIfExists(deviceName string) error {
217-
devToSCSI := map[string]string{}
218-
sds, err := filepath.Glob(diskSDPattern)
298+
devFsPathToSerial := map[string]string{}
299+
devFsPaths, err := findAvailableDevFsPaths()
219300
if err != nil {
220-
return fmt.Errorf("failed to filepath.Glob(\"%s\"): %v", diskSDPattern, err)
301+
return err
221302
}
222-
for _, devSDX := range sds {
223-
scsiSerial, err := getScsiSerial(devSDX)
224-
if err != nil {
225-
return fmt.Errorf("failed to get SCSI Serial num: %v", err)
303+
for _, devFsPath := range devFsPaths {
304+
devFsSerial, err := getDevFsSerial(devFsPath)
305+
if err != nil || len(devFsSerial) == 0 {
306+
// If we get an error, ignore. Either this isn't a block device, or it
307+
// isn't something we can get a serial number from
308+
klog.V(7).Infof("failed to get Serial num for disk %s at path %s: %v", deviceName, devFsPath, err)
309+
continue
226310
}
227-
devToSCSI[devSDX] = scsiSerial
228-
if scsiSerial == deviceName {
311+
devFsPathToSerial[devFsPath] = devFsSerial
312+
if devFsSerial == deviceName {
229313
// Found the disk that we're looking for so run a trigger on it
230314
// to resolve its /dev/by-id/ path
231-
klog.Warningf("udevadm --trigger running to fix disk at path %s which has SCSI ID %s", devSDX, scsiSerial)
232-
err := udevadmChangeToDrive(devSDX)
315+
klog.Warningf("udevadm --trigger running to fix disk at path %s which has serial numberID %s", devFsPath, devFsSerial)
316+
err := udevadmChangeToDrive(devFsPath)
233317
if err != nil {
234-
return fmt.Errorf("failed to fix disk which has SCSI ID %s: %v", scsiSerial, err)
318+
return fmt.Errorf("failed to fix disk which has serial numberID %s: %v", devFsSerial, err)
235319
}
236320
return nil
237321
}
238322
}
239-
klog.Warningf("udevadm --trigger requested to fix disk %s but no such disk was found in %v", deviceName, devToSCSI)
323+
klog.Warningf("udevadm --trigger requested to fix disk %s but no such disk was found in %v", deviceName, devFsPathToSerial)
240324
return fmt.Errorf("udevadm --trigger requested to fix disk %s but no such disk was found", deviceName)
241325
}
242326

243327
// Calls "udevadm trigger --action=change" on the specified drive. drivePath
244-
// must be the block device path to trigger on, in the format "/dev/sd*", or a
245-
// symlink to it. This is workaround for Issue #7972. Once the underlying issue
246-
// has been resolved, this may be removed.
328+
// must be the block device path to trigger on, in the format "/dev/*", or a
329+
// symlink to it. This is workaround for Issue #7972
330+
// (https://github.com/kubernetes/kubernetes/issues/7972). Once the underlying
331+
// issue has been resolved, this may be removed.
247332
// udevadm takes a little bit to work its magic in the background so any callers
248333
// should not expect the trigger to complete instantly and may need to poll for
249334
// the change
250-
func udevadmChangeToDrive(devSDX string) error {
251-
// Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/sd..."
335+
func udevadmChangeToDrive(devFsPath string) error {
336+
// Call "udevadm trigger --action=change --property-match=DEVNAME=/dev/..."
252337
out, err := exec.Command(
253338
"udevadm",
254339
"trigger",
255340
"--action=change",
256-
fmt.Sprintf("--property-match=DEVNAME=%s", devSDX)).CombinedOutput()
341+
fmt.Sprintf("--property-match=DEVNAME=%s", devFsPath)).CombinedOutput()
257342
if err != nil {
258-
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v.", devSDX, string(out), err)
343+
return fmt.Errorf("udevadmChangeToDrive: udevadm trigger failed for drive %q with output %s: %v.", devFsPath, string(out), err)
259344
}
260345
return nil
261346
}
+47
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
package mountmanager
2+
3+
import (
4+
"testing"
5+
)
6+
7+
func TestParseNvmeSerial(t *testing.T) {
8+
testCases := []struct {
9+
name string
10+
output string
11+
serial string
12+
expErr bool
13+
}{
14+
{
15+
name: "valid google_nvme_id response",
16+
output: "line 58: warning: command substitution: ignored null byte in input\nID_SERIAL_SHORT=pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9\nID_SERIAL=Google_PersistentDisk_pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9",
17+
serial: "pvc-8ee0cf44-6acd-456e-9f3b-95ccd65065b9",
18+
expErr: false,
19+
},
20+
{
21+
name: "valid google_nvme_id boot disk response",
22+
output: "line 58: warning: command substitution: ignored null byte in input\nID_SERIAL_SHORT=persistent-disk-0\nID_SERIAL=Google_PersistentDisk_persistent-disk-0",
23+
serial: "persistent-disk-0",
24+
expErr: false,
25+
},
26+
{
27+
name: "invalid google_nvme_id response",
28+
output: "Error: requesting namespace-id from non-block device\nNVMe Status:INVALID_NS: The namespace or the format of that namespace is invalid(b) NSID:0\nxxd: sorry cannot seek.\n[2022-03-12T04:17:17+0000]: NVMe Vendor Extension disk information not present",
29+
serial: "",
30+
expErr: true,
31+
},
32+
}
33+
34+
for _, tc := range testCases {
35+
t.Logf("Running test: %v", tc.name)
36+
actualSerial, err := parseNvmeSerial(tc.output)
37+
if tc.expErr && err == nil {
38+
t.Fatalf("Expected error but didn't get any")
39+
}
40+
if !tc.expErr && err != nil {
41+
t.Fatalf("Got unexpected error: %s", err)
42+
}
43+
if actualSerial != tc.serial {
44+
t.Fatalf("Expected '%s' but got '%s'", tc.serial, actualSerial)
45+
}
46+
}
47+
}

test/e2e/tests/setup_e2e_test.go

+5
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,11 @@ var _ = BeforeSuite(func() {
104104
klog.Fatalf("could not copy scsi_id to containerized directory: %v", err)
105105
}
106106

107+
err = testutils.CopyFile(i, "/lib/udev/google_nvme_id", "/lib/udev_containerized/google_nvme_id")
108+
if err != nil {
109+
klog.Fatalf("could not copy google_nvme_id to containerized directory: %v", err)
110+
}
111+
107112
klog.Infof("Creating new driver and client for node %s\n", i.GetName())
108113
// Create new driver and client
109114
testContext, err := testutils.GCEClientAndDriverSetup(i)

0 commit comments

Comments
 (0)