Skip to content

Commit d54cba7

Browse files
committed
Implementing watcher & reboot stability for data cache to master branch.
1 parent 58c8eaf commit d54cba7

35 files changed

+4659
-8
lines changed

cmd/gce-pd-csi-driver/main.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,10 @@ func handle() {
260260
if nodeName == nil || *nodeName == "" {
261261
klog.Errorf("Data Cache enabled, but --node-name not passed")
262262
}
263-
if err := setupDataCache(ctx, *nodeName); err != nil {
264-
klog.Errorf("Data Cache setup failed: %v", err)
263+
if err := setupDataCache(ctx, *nodeName, nodeServer.MetadataService.GetName()); err != nil {
264+
klog.Errorf("DataCache setup failed: %v", err)
265265
}
266+
go driver.StartWatcher(*nodeName)
266267
}
267268

268269
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
@@ -385,7 +386,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
385386
return availableLssds, nil
386387
}
387388

388-
func setupDataCache(ctx context.Context, nodeName string) error {
389+
func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
389390
isAlreadyRaided, err := driver.IsRaided()
390391
if err != nil {
391392
klog.V(4).Infof("Errored while scanning for available LocalSSDs err:%v; continuing Raiding", err)
@@ -415,6 +416,11 @@ func setupDataCache(ctx context.Context, nodeName string) error {
415416
return fmt.Errorf("Failed to Raid local SSDs, unable to setup Data Cache, got error %v", err)
416417
}
417418

419+
// Initializing data cache node (VG checks w/ raided lssd)
420+
if err := driver.InitializeDataCacheNode(nodeId); err != nil {
421+
return err
422+
}
423+
418424
klog.V(4).Infof("LSSD caching is setup for the Data Cache enabled node %s", nodeName)
419425
return nil
420426
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ require (
5858
github.com/davecgh/go-spew v1.1.1 // indirect
5959
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
6060
github.com/felixge/httpsnoop v1.0.4 // indirect
61-
github.com/fsnotify/fsnotify v1.5.4 // indirect
61+
github.com/fsnotify/fsnotify v1.8.0 // indirect
6262
github.com/go-logr/logr v1.4.2 // indirect
6363
github.com/go-logr/stdr v1.2.2 // indirect
6464
github.com/go-openapi/jsonpointer v0.20.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4
10321032
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
10331033
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
10341034
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
1035+
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
1036+
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
10351037
github.com/fsouza/fake-gcs-server v0.0.0-20180612165233-e85be23bdaa8/go.mod h1:1/HufuJ+eaDf4KTnYdS6HJMGvMRU8d4cYTuu/1QaBbI=
10361038
github.com/fsouza/fake-gcs-server v1.19.4/go.mod h1:I0/88nHCASqJJ5M7zVF0zKODkYTcuXFW5J5yajsNJnE=
10371039
github.com/fvbommel/sortorder v1.0.1/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=

pkg/gce-pd-csi-driver/cache.go

+97-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010

1111
csi "github.com/container-storage-interface/spec/lib/go/csi"
12+
fsnotify "github.com/fsnotify/fsnotify"
1213
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1314
"k8s.io/client-go/kubernetes"
1415
"k8s.io/client-go/rest"
@@ -66,7 +67,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
6667
// Clean up Volume Group before adding the PD
6768
reduceVolumeGroup(volumeGroupName, true)
6869
} else {
69-
err := createVg(volumeGroupName, devicePath, raidedLocalSsdPath)
70+
err := createVg(volumeGroupName, raidedLocalSsdPath)
7071
if err != nil {
7172
return mainDevicePath, err
7273
}
@@ -430,7 +431,7 @@ func getLvName(suffix string, volumeId string) string {
430431
return fmt.Sprintf("%s-%s", suffix, pvcName)
431432
}
432433

433-
func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string) error {
434+
func createVg(volumeGroupName string, raidedLocalSsds string) error {
434435
args := []string{
435436
"--zero",
436437
"y",
@@ -547,3 +548,97 @@ func fetchChunkSizeKiB(cacheSize string) (string, error) {
547548
// default chunk size unit KiB
548549
return strconv.FormatInt(int64(chunkSize), 10) + "KiB", nil
549550
}
551+
552+
func InitializeDataCacheNode(nodeId string) error {
553+
raidedLocalSsdPath, err := fetchRAIDedLocalSsdPath()
554+
if err != nil {
555+
return err
556+
}
557+
volumeGroupName := getVolumeGroupName(nodeId)
558+
559+
vgExists := checkVgExists(volumeGroupName)
560+
// Check if the required volume group already exists
561+
if vgExists {
562+
// Clean up Volume Group before adding the PD
563+
reduceVolumeGroup(volumeGroupName, true)
564+
565+
// validate that raidedLSSD is part of VG
566+
err = validateRaidedLSSDinVG(volumeGroupName, raidedLocalSsdPath)
567+
if err != nil {
568+
return fmt.Errorf("failed validate local ssd in vg %v: %v", volumeGroupName, err)
569+
}
570+
} else {
571+
err := createVg(volumeGroupName, raidedLocalSsdPath)
572+
if err != nil {
573+
return err
574+
}
575+
}
576+
return nil
577+
}
578+
579+
func StartWatcher(nodeName string) {
580+
dirToWatch := "/dev/"
581+
watcher, err := fsnotify.NewWatcher()
582+
if err != nil {
583+
klog.V(2).ErrorS(err, "errored while creating watcher")
584+
}
585+
klog.V(2).Infof("Watcher started for directory %v", dirToWatch)
586+
defer watcher.Close()
587+
588+
// out of the box fsnotify can watch a single file, or a single directory
589+
if err := watcher.Add(dirToWatch); err != nil {
590+
klog.V(2).ErrorS(err, "errored while adding watcher directory")
591+
}
592+
errorCh := make(chan error, 1)
593+
// Handle the error received from the watcher goroutine
594+
go watchDiskDetaches(watcher, nodeName, errorCh)
595+
596+
select {
597+
case err := <-errorCh:
598+
klog.Errorf("watcher encountered an error: %v", err)
599+
}
600+
}
601+
602+
func watchDiskDetaches(watcher *fsnotify.Watcher, nodeName string, errorCh chan error) error {
603+
for {
604+
select {
605+
// watch for errors
606+
case err := <-watcher.Errors:
607+
errorCh <- fmt.Errorf("disk update event errored: %v", err)
608+
// watch for events
609+
case event := <-watcher.Events:
610+
// In case of an event i.e. creation or deletion of any new PV, we update the VG metadata.
611+
// This might include some non-LVM changes, no harm in updating metadata multiple times.
612+
reduceVolumeGroup(getVolumeGroupName(nodeName), true)
613+
klog.V(2).Infof("disk attach/detach event %#v\n", event)
614+
}
615+
}
616+
}
617+
618+
func validateRaidedLSSDinVG(vgName string, lssdPath string) error {
619+
args := []string{
620+
"--noheadings",
621+
"-o",
622+
"pv_name",
623+
"--select",
624+
"vg_name=" + vgName,
625+
}
626+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "pvs", args...)
627+
if err != nil {
628+
return fmt.Errorf("errored while checking physical volume details %v: %s", err, info)
629+
// On error info contains the error message which we cannot use for further steps
630+
}
631+
632+
if !strings.Contains(string(info), lssdPath) {
633+
return addRaidedLSSDToVg(vgName, lssdPath)
634+
}
635+
return nil
636+
}
637+
638+
func addRaidedLSSDToVg(vgName, lssdPath string) error {
639+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "vgextend", []string{vgName, lssdPath}...)
640+
if err != nil {
641+
return fmt.Errorf("errored while extending VGs %v: %s", err, info)
642+
}
643+
return nil
644+
}

vendor/github.com/fsnotify/fsnotify/.cirrus.yml

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.gitignore

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.mailmap

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)