Skip to content

Commit 11a1d38

Browse files
authored
Merge pull request #1946 from halimsam/master
Implementing watcher & reboot stability for data cache to master branch.
2 parents 58c8eaf + 7bb8b6f commit 11a1d38

35 files changed

+4664
-14
lines changed

cmd/gce-pd-csi-driver/main.go

+14-9
Original file line numberDiff line numberDiff line change
@@ -254,14 +254,14 @@ func handle() {
254254
if *maxConcurrentFormatAndMount > 0 {
255255
nodeServer = nodeServer.WithSerializedFormatAndMount(*formatAndMountTimeout, *maxConcurrentFormatAndMount)
256256
}
257-
}
258-
259-
if *enableDataCacheFlag {
260-
if nodeName == nil || *nodeName == "" {
261-
klog.Errorf("Data Cache enabled, but --node-name not passed")
262-
}
263-
if err := setupDataCache(ctx, *nodeName); err != nil {
264-
klog.Errorf("Data Cache setup failed: %v", err)
257+
if *enableDataCacheFlag {
258+
if nodeName == nil || *nodeName == "" {
259+
klog.Errorf("Data Cache enabled, but --node-name not passed")
260+
}
261+
if err := setupDataCache(ctx, *nodeName, nodeServer.MetadataService.GetName()); err != nil {
262+
klog.Errorf("DataCache setup failed: %v", err)
263+
}
264+
go driver.StartWatcher(*nodeName)
265265
}
266266
}
267267

@@ -385,7 +385,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
385385
return availableLssds, nil
386386
}
387387

388-
func setupDataCache(ctx context.Context, nodeName string) error {
388+
func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
389389
isAlreadyRaided, err := driver.IsRaided()
390390
if err != nil {
391391
klog.V(4).Infof("Errored while scanning for available LocalSSDs err:%v; continuing Raiding", err)
@@ -415,6 +415,11 @@ func setupDataCache(ctx context.Context, nodeName string) error {
415415
return fmt.Errorf("Failed to Raid local SSDs, unable to setup Data Cache, got error %v", err)
416416
}
417417

418+
// Initializing data cache node (VG checks w/ raided lssd)
419+
if err := driver.InitializeDataCacheNode(nodeId); err != nil {
420+
return err
421+
}
422+
418423
klog.V(4).Infof("LSSD caching is setup for the Data Cache enabled node %s", nodeName)
419424
return nil
420425
}

go.mod

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ require (
5858
github.com/davecgh/go-spew v1.1.1 // indirect
5959
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
6060
github.com/felixge/httpsnoop v1.0.4 // indirect
61-
github.com/fsnotify/fsnotify v1.5.4 // indirect
61+
github.com/fsnotify/fsnotify v1.8.0 // indirect
6262
github.com/go-logr/logr v1.4.2 // indirect
6363
github.com/go-logr/stdr v1.2.2 // indirect
6464
github.com/go-openapi/jsonpointer v0.20.0 // indirect

go.sum

+2
Original file line numberDiff line numberDiff line change
@@ -1032,6 +1032,8 @@ github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4
10321032
github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU=
10331033
github.com/fsnotify/fsnotify v1.5.4 h1:jRbGcIw6P2Meqdwuo0H1p6JVLbL5DHKAKlYndzMwVZI=
10341034
github.com/fsnotify/fsnotify v1.5.4/go.mod h1:OVB6XrOHzAwXMpEM7uPOzcehqUV2UqJxmVXmkdnm1bU=
1035+
github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/8M=
1036+
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
10351037
github.com/fsouza/fake-gcs-server v0.0.0-20180612165233-e85be23bdaa8/go.mod h1:1/HufuJ+eaDf4KTnYdS6HJMGvMRU8d4cYTuu/1QaBbI=
10361038
github.com/fsouza/fake-gcs-server v1.19.4/go.mod h1:I0/88nHCASqJJ5M7zVF0zKODkYTcuXFW5J5yajsNJnE=
10371039
github.com/fvbommel/sortorder v1.0.1/go.mod h1:uk88iVf1ovNn1iLfgUVU2F9o5eO30ui720w+kxuqRs0=

pkg/gce-pd-csi-driver/cache.go

+97-2
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ import (
99
"strings"
1010

1111
csi "github.com/container-storage-interface/spec/lib/go/csi"
12+
fsnotify "github.com/fsnotify/fsnotify"
1213
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1314
"k8s.io/client-go/kubernetes"
1415
"k8s.io/client-go/rest"
@@ -66,7 +67,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
6667
// Clean up Volume Group before adding the PD
6768
reduceVolumeGroup(volumeGroupName, true)
6869
} else {
69-
err := createVg(volumeGroupName, devicePath, raidedLocalSsdPath)
70+
err := createVg(volumeGroupName, raidedLocalSsdPath)
7071
if err != nil {
7172
return mainDevicePath, err
7273
}
@@ -430,7 +431,7 @@ func getLvName(suffix string, volumeId string) string {
430431
return fmt.Sprintf("%s-%s", suffix, pvcName)
431432
}
432433

433-
func createVg(volumeGroupName string, devicePath string, raidedLocalSsds string) error {
434+
func createVg(volumeGroupName string, raidedLocalSsds string) error {
434435
args := []string{
435436
"--zero",
436437
"y",
@@ -547,3 +548,97 @@ func fetchChunkSizeKiB(cacheSize string) (string, error) {
547548
// default chunk size unit KiB
548549
return strconv.FormatInt(int64(chunkSize), 10) + "KiB", nil
549550
}
551+
552+
func InitializeDataCacheNode(nodeId string) error {
553+
raidedLocalSsdPath, err := fetchRAIDedLocalSsdPath()
554+
if err != nil {
555+
return err
556+
}
557+
volumeGroupName := getVolumeGroupName(nodeId)
558+
559+
vgExists := checkVgExists(volumeGroupName)
560+
// Check if the required volume group already exists
561+
if vgExists {
562+
// Clean up Volume Group before adding the PD
563+
reduceVolumeGroup(volumeGroupName, true)
564+
565+
// validate that raidedLSSD is part of VG
566+
err = validateRaidedLSSDinVG(volumeGroupName, raidedLocalSsdPath)
567+
if err != nil {
568+
return fmt.Errorf("failed validate local ssd in vg %v: %v", volumeGroupName, err)
569+
}
570+
} else {
571+
err := createVg(volumeGroupName, raidedLocalSsdPath)
572+
if err != nil {
573+
return err
574+
}
575+
}
576+
return nil
577+
}
578+
579+
func StartWatcher(nodeName string) {
580+
dirToWatch := "/dev/"
581+
watcher, err := fsnotify.NewWatcher()
582+
if err != nil {
583+
klog.V(2).ErrorS(err, "errored while creating watcher")
584+
}
585+
klog.V(2).Infof("Watcher started for directory %v", dirToWatch)
586+
defer watcher.Close()
587+
588+
// out of the box fsnotify can watch a single file, or a single directory
589+
if err := watcher.Add(dirToWatch); err != nil {
590+
klog.V(2).ErrorS(err, "errored while adding watcher directory")
591+
}
592+
errorCh := make(chan error, 1)
593+
// Handle the error received from the watcher goroutine
594+
go watchDiskDetaches(watcher, nodeName, errorCh)
595+
596+
select {
597+
case err := <-errorCh:
598+
klog.Errorf("watcher encountered an error: %v", err)
599+
}
600+
}
601+
602+
func watchDiskDetaches(watcher *fsnotify.Watcher, nodeName string, errorCh chan error) error {
603+
for {
604+
select {
605+
// watch for errors
606+
case err := <-watcher.Errors:
607+
errorCh <- fmt.Errorf("disk update event errored: %v", err)
608+
// watch for events
609+
case event := <-watcher.Events:
610+
// In case of an event i.e. creation or deletion of any new PV, we update the VG metadata.
611+
// This might include some non-LVM changes, no harm in updating metadata multiple times.
612+
reduceVolumeGroup(getVolumeGroupName(nodeName), true)
613+
klog.V(2).Infof("disk attach/detach event %#v\n", event)
614+
}
615+
}
616+
}
617+
618+
func validateRaidedLSSDinVG(vgName string, lssdPath string) error {
619+
args := []string{
620+
"--noheadings",
621+
"-o",
622+
"pv_name",
623+
"--select",
624+
"vg_name=" + vgName,
625+
}
626+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "pvs", args...)
627+
if err != nil {
628+
return fmt.Errorf("errored while checking physical volume details %v: %s", err, info)
629+
// On error info contains the error message which we cannot use for further steps
630+
}
631+
632+
if !strings.Contains(string(info), lssdPath) {
633+
return addRaidedLSSDToVg(vgName, lssdPath)
634+
}
635+
return nil
636+
}
637+
638+
func addRaidedLSSDToVg(vgName, lssdPath string) error {
639+
info, err := common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "vgextend", []string{vgName, lssdPath}...)
640+
if err != nil {
641+
return fmt.Errorf("errored while extending VGs %v: %s", err, info)
642+
}
643+
return nil
644+
}

vendor/github.com/fsnotify/fsnotify/.cirrus.yml

+14
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.gitignore

+10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vendor/github.com/fsnotify/fsnotify/.mailmap

+2
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)