Skip to content

Commit 53094bc

Browse files
committed
Implementing watcher & reboot stability for data cache to master branch.
1 parent ed6b156 commit 53094bc

35 files changed

+4654
-8
lines changed

cmd/gce-pd-csi-driver/main.go

+9-3
Original file line numberDiff line numberDiff line change
@@ -260,9 +260,10 @@ func handle() {
260260
if nodeName == nil || *nodeName == "" {
261261
klog.Errorf("Data Cache enabled, but --node-name not passed")
262262
}
263-
if err := setupDataCache(ctx, *nodeName); err != nil {
264-
klog.Errorf("Data Cache setup failed: %v", err)
263+
if err := setupDataCache(ctx, *nodeName, nodeServer.MetadataService.GetName()); err != nil {
264+
klog.Errorf("DataCache setup failed: %v", err)
265265
}
266+
go driver.StartWatcher(*nodeName)
266267
}
267268

268269
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
@@ -385,7 +386,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
385386
return availableLssds, nil
386387
}
387388

388-
func setupDataCache(ctx context.Context, nodeName string) error {
389+
func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
389390
isAlreadyRaided, err := driver.IsRaided()
390391
if err != nil {
391392
klog.V(4).Infof("Errored while scanning for available LocalSSDs err:%v; continuing Raiding", err)
@@ -415,6 +416,11 @@ func setupDataCache(ctx context.Context, nodeName string) error {
415416
return fmt.Errorf("Failed to Raid local SSDs, unable to setup Data Cache, got error %v", err)
416417
}
417418

419+
// Initializing data cache node (VG checks w/ raided lssd)
420+
if err := driver.InitializeDataCacheNode(nodeId); err != nil {
421+
return err
422+
}
423+
418424
klog.V(4).Infof("LSSD caching is setup for the Data Cache enabled node %s", nodeName)
419425
return nil
420426
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ require (
5858
github.com/davecgh/go-spew v1.1.1 // indirect
5959
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
6060
github.com/felixge/httpsnoop v1.0.4 // indirect
61-
github.com/fsnotify/fsnotify v1.5.4 // indirect
61+
github.com/fsnotify/fsnotify v1.8.0 // indirect
6262
github.com/go-logr/logr v1.4.2 // indirect
6363
github.com/go-logr/stdr v1.2.2 // indirect
6464
github.com/go-openapi/jsonpointer v0.20.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4
10321032
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
10331033
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
10341034
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
1035+
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
1036+
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
10351037
github.com/fsouza/fake-gcs-server v0.0.0-20180612165233-e85be23bdaa8/go.mod h1:1/HufuJ+eaDf4KTnYdS6HJMGvMRU8d4cYTuu/1QaBbI=
10361038
github.com/fsouza/fake-gcs-server v1.19.4/go.mod h1:I0/88nHCASqJJ5M7zVF0zKODkYTcuXFW5J5yajsNJnE=
10371039
github.com/fvbommel/sortorder v1.0.1/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=

pkg/gce-pd-csi-driver/cache.go

+92-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"strings"
99

1010
csi "github.com/container-storage-interface/spec/lib/go/csi"
11+
fsnotify "github.com/fsnotify/fsnotify"
1112
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1213
"k8s.io/client-go/kubernetes"
1314
"k8s.io/client-go/rest"
@@ -57,7 +58,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
5758
// Clean up Volume Group before adding the PD
5859
reduceVolumeGroup(volumeGroupName, true)
5960
} else {
60-
err := createVg(volumeGroupName, devicePath, raidedLocalSsdPath)
61+
err := createVg(volumeGroupName, raidedLocalSsdPath)
6162
if err != nil {
6263
return mainDevicePath, err
6364
}
@@ -395,7 +396,7 @@ func getLvName(suffix string, volumeId string) string {
395396
return fmt.Sprintf("%s-%s", suffix, pvcName)
396397
}
397398

398-
func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string) error {
399+
func createVg(volumeGroupName string, raidedLocalSsds string) error {
399400
args := []string{
400401
"--zero",
401402
"y",
@@ -497,3 +498,92 @@ func isCachingSetup(mainLvName string) (error, bool) {
497498
}
498499
return nil, false
499500
}
501+
502+
func InitializeDataCacheNode(nodeId string) error {
503+
raidedLocalSsdPath, err := fetchRAIDedLocalSsdPath()
504+
if err != nil {
505+
return err
506+
}
507+
volumeGroupName := getVolumeGroupName(nodeId)
508+
509+
vgExists := checkVgExists(volumeGroupName)
510+
// Check if the required volume group already exists
511+
if vgExists {
512+
// Clean up Volume Group before adding the PD
513+
reduceVolumeGroup(volumeGroupName, true)
514+
515+
// validate that raidedLSSD is part of VG
516+
err = validateRaidedLSSDinVG(volumeGroupName, raidedLocalSsdPath)
517+
if err != nil {
518+
return fmt.Errorf("failed validate local ssd in vg %v: %v", volumeGroupName, err)
519+
}
520+
} else {
521+
err := createVg(volumeGroupName, raidedLocalSsdPath)
522+
if err != nil {
523+
return err
524+
}
525+
}
526+
return nil
527+
}
528+
529+
func StartWatcher(nodeName string) {
530+
dirToWatch := "/dev/"
531+
watcher, err := fsnotify.NewWatcher()
532+
if err != nil {
533+
klog.V(2).ErrorS(err, "errored while creating watcher")
534+
}
535+
klog.V(2).Infof("Watcher started for directory %v", dirToWatch)
536+
defer watcher.Close()
537+
538+
// out of the box fsnotify can watch a single file, or a single directory
539+
if err := watcher.Add(dirToWatch); err != nil {
540+
klog.V(2).ErrorS(err, "errored while adding watcher directory")
541+
}
542+
errorCh := make(chan error, 1)
543+
// Handle the error received from the watcher goroutine
544+
go watchDiskDetaches(watcher, nodeName, errorCh)
545+
546+
select {
547+
case err := <-errorCh:
548+
klog.Errorf("watcher encountered an error: %v", err)
549+
}
550+
}
551+
552+
func watchDiskDetaches(watcher *fsnotify.Watcher, nodeName string, errorCh chan error) error {
553+
for {
554+
select {
555+
// watch for errors
556+
case err := <-watcher.Errors:
557+
errorCh <- fmt.Errorf("disk update event errored: %v", err)
558+
// watch for events
559+
case event := <-watcher.Events:
560+
// In case of an event i.e. creation or deletion of any new PV, we update the VG metadata.
561+
// This might include some non-LVM changes, no harm in updating metadata multiple times.
562+
reduceVolumeGroup(getVolumeGroupName(nodeName), true)
563+
klog.V(2).Infof("disk attach/detach event %#v\n", event)
564+
}
565+
}
566+
}
567+
568+
func validateRaidedLSSDinVG(vgName string, lssdPath string) error {
569+
args := []string{
570+
"--noheadings",
571+
"-o",
572+
"pv_name",
573+
"--select",
574+
"vg_name=" + vgName,
575+
}
576+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "pvs", args...)
577+
if err != nil {
578+
return fmt.Errorf("errored while checking physical volume details %v: %s", err, info)
579+
// On error info contains the error message which we cannot use for further steps
580+
}
581+
582+
if !strings.Contains(string(info), lssdPath) {
583+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "vgextend", []string{vgName, lssdPath}...)
584+
if err != nil {
585+
klog.Errorf("errored while extending VGs %v: %s", err, info)
586+
}
587+
}
588+
return nil
589+
}

vendor/github.com/fsnotify/fsnotify/.cirrus.yml

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.gitignore

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.mailmap

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)