Skip to content

Commit cdd1023

Browse files
committed
Implementing watcher & reboot stability for data cache to master branch.
1 parent 39a5910 commit cdd1023

35 files changed

+4656
-8
lines changed

cmd/gce-pd-csi-driver/main.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -266,9 +266,10 @@ func handle() {
266266
if nodeName == nil || *nodeName == "" {
267267
klog.Errorf("Data cache enabled, but --node-name not passed")
268268
}
269-
if err := setupDataCache(ctx, *nodeName); err != nil {
269+
if err := setupDataCache(ctx, *nodeName, nodeServer.MetadataService.GetName()); err != nil {
270270
klog.Errorf("DataCache setup failed: %v", err)
271271
}
272+
go driver.StartWatcher(*nodeName)
272273
}
273274

274275
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
@@ -350,7 +351,7 @@ func urlFlag(target **url.URL, name string, usage string) {
350351
})
351352
}
352353

353-
func setupDataCache(ctx context.Context, nodeName string) error {
354+
func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
354355
klog.V(2).Infof("Setting up data cache for node %s", nodeName)
355356
if nodeName != common.TestNode {
356357
cfg, err := rest.InClusterConfig()
@@ -373,7 +374,12 @@ func setupDataCache(ctx context.Context, nodeName string) error {
373374
}
374375
klog.V(2).Info("Raiding local ssds to setup data cache")
375376
if err := driver.RaidLocalSsds(); err != nil {
376-
return fmt.Errorf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
377+
return fmt.Errorf("failed to Raid local SSDs, unable to setup data caching, got error %v", err)
378+
}
379+
380+
// Initializing data cache node (VG checks w/ raided lssd)
381+
if err := driver.InitializeDataCacheNode(nodeId); err != nil {
382+
return err
377383
}
378384

379385
klog.V(2).Infof("Datacache enabled for node %s", nodeName)

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ require (
5858
github.com/davecgh/go-spew v1.1.1 // indirect
5959
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
6060
github.com/felixge/httpsnoop v1.0.4 // indirect
61-
github.com/fsnotify/fsnotify v1.5.4 // indirect
61+
github.com/fsnotify/fsnotify v1.8.0 // indirect
6262
github.com/go-logr/logr v1.4.2 // indirect
6363
github.com/go-logr/stdr v1.2.2 // indirect
6464
github.com/go-openapi/jsonpointer v0.20.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4
10321032
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
10331033
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
10341034
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
1035+
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
1036+
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
10351037
github.com/fsouza/fake-gcs-server v0.0.0-20180612165233-e85be23bdaa8/go.mod h1:1/HufuJ+eaDf4KTnYdS6HJMGvMRU8d4cYTuu/1QaBbI=
10361038
github.com/fsouza/fake-gcs-server v1.19.4/go.mod h1:I0/88nHCASqJJ5M7zVF0zKODkYTcuXFW5J5yajsNJnE=
10371039
github.com/fvbommel/sortorder v1.0.1/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=

pkg/gce-pd-csi-driver/cache.go

+94-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import (
77
"strings"
88

99
csi "github.com/container-storage-interface/spec/lib/go/csi"
10+
fsnotify "github.com/fsnotify/fsnotify"
1011

1112
"k8s.io/klog/v2"
1213

@@ -42,7 +43,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
4243
// Clean up Volume Group before adding the PD
4344
reduceVolumeGroup(volumeGroupName, true)
4445
} else {
45-
err := createVg(volumeGroupName, devicePath, raidedLocalSsdPath)
46+
err := createVg(volumeGroupName, raidedLocalSsdPath)
4647
if err != nil {
4748
return mainDevicePath, err
4849
}
@@ -241,7 +242,7 @@ func getLvName(suffix string, volumeId string) string {
241242
return fmt.Sprintf("%s-%s", suffix, pvcName)
242243
}
243244

244-
func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string) error {
245+
func createVg(volumeGroupName string, raidedLocalSsds string) error {
245246
args := []string{
246247
"--zero",
247248
"y",
@@ -366,3 +367,94 @@ func isCachingSetup(mainLvName string) (error, bool) {
366367
}
367368
return nil, false
368369
}
370+
371+
func InitializeDataCacheNode(nodeId string) error {
372+
// vgcreate with the raided local ssds.
373+
info, err := common.RunCommand("grep", raidedLocalSsdName, "ls", raidedLssdPrefix)
374+
if err != nil {
375+
return fmt.Errorf("failed while listing raided devices, err: %v, output: %v", err, info)
376+
}
377+
infoString := strings.TrimSpace(string(info))
378+
raidedLocalSsdPath = raidedLssdPrefix + infoString
379+
volumeGroupName := getVolumeGroupName(nodeId)
380+
381+
vgExists := checkVgExists(volumeGroupName)
382+
// Check if the required volume group already exists
383+
if vgExists {
384+
// Clean up Volume Group before adding the PD
385+
reduceVolumeGroup(volumeGroupName, true)
386+
387+
// validate that raidedLSSD is part of VG
388+
err = validateRaidedLSSDinVG(volumeGroupName)
389+
if err != nil {
390+
return fmt.Errorf("failed validate local ssd in vg %v: %v", volumeGroupName, err)
391+
}
392+
} else {
393+
err := createVg(volumeGroupName, raidedLocalSsdPath)
394+
if err != nil {
395+
return err
396+
}
397+
}
398+
return nil
399+
}
400+
401+
func StartWatcher(nodeName string) {
402+
dirToWatch := "/dev/"
403+
watcher, err := fsnotify.NewWatcher()
404+
if err != nil {
405+
klog.V(2).ErrorS(err, "errored while creating watcher")
406+
}
407+
defer watcher.Close()
408+
409+
// out of the box fsnotify can watch a single file, or a single directory
410+
if err := watcher.Add(dirToWatch); err != nil {
411+
klog.V(2).ErrorS(err, "errored while adding watcher directory")
412+
}
413+
errorCh := make(chan error, 1)
414+
// Handle the error received from the watcher goroutine
415+
go watchDiskDetaches(watcher, nodeName, errorCh)
416+
417+
select {
418+
case err := <-errorCh:
419+
klog.Errorf("watcher encountered an error: %v", err)
420+
}
421+
}
422+
423+
func watchDiskDetaches(watcher *fsnotify.Watcher, nodeName string, errorCh chan error) error {
424+
for {
425+
select {
426+
// watch for errors
427+
case err := <-watcher.Errors:
428+
errorCh <- fmt.Errorf("disk update event errored: %v", err)
429+
// watch for events
430+
case event := <-watcher.Events:
431+
// In case of an event i.e. creation or deletion of any new PV, we update the VG metadata.
432+
// This might include some non-LVM changes, no harm in updating metadata multiple times.
433+
reduceVolumeGroup(getVolumeGroupName(nodeName), true)
434+
klog.V(2).Infof("disk attach/detach event %#v\n", event)
435+
}
436+
}
437+
}
438+
439+
func validateRaidedLSSDinVG(vgName string) error {
440+
args := []string{
441+
"--noheadings",
442+
"-o",
443+
"pv_name",
444+
"--select",
445+
"vg_name=" + vgName,
446+
}
447+
info, err := common.RunCommand("" /* pipedCmd */, "" /* pipedCmdArg */, "pvs", args...)
448+
if err != nil {
449+
return fmt.Errorf("errored while checking physical volume details %v: %s", err, info)
450+
// On error info contains the error message which we cannot use for further steps
451+
}
452+
453+
if !strings.Contains(string(info), "/dev/md127") {
454+
info, err := common.RunCommand("" /* pipedCmd */, "" /* pipedCmdArg */, "vgextend", []string{vgName, "/dev/md127"}...)
455+
if err != nil {
456+
klog.Errorf("errored while extending VGs %v: %s", err, info)
457+
}
458+
}
459+
return nil
460+
}

vendor/github.com/fsnotify/fsnotify/.cirrus.yml

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.gitignore

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.mailmap

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)