Skip to content

Commit 1042bce

Browse files
committed
use node UID for VG creation instead of name
1 parent 0be5a10 commit 1042bce

File tree

3 files changed

+43
-31
lines changed

3 files changed

+43
-31
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 30 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"strings"
2828
"time"
2929

30+
v1 "k8s.io/api/core/v1"
3031
"k8s.io/klog/v2"
3132
"k8s.io/utils/strings/slices"
3233
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
@@ -245,15 +246,26 @@ func handle() {
245246
if err != nil {
246247
klog.Fatalf("Failed to set up metadata service: %v", err.Error())
247248
}
248-
isDataCacheEnabledNodePool, err := isDataCacheEnabledNodePool(ctx, *nodeName)
249-
if err != nil {
250-
klog.Fatalf("Failed to get node info from API server: %v", err.Error())
249+
var node *v1.Node
250+
var isDataCacheEnabledNodePoolCheck bool
251+
if *nodeName == common.TestNode {
252+
isDataCacheEnabledNodePoolCheck = true
253+
} else if len(*nodeName) > 0 && *nodeName != common.TestNode {
254+
node, err = driver.FetchNodeWithRetry(ctx, *nodeName)
255+
if err != nil {
256+
klog.Fatalf("Failed to get node info from API server: %v", err.Error())
257+
}
258+
isDataCacheEnabledNodePoolCheck, err = isDataCacheEnabledNodePool(ctx, node)
259+
if err != nil {
260+
klog.Fatalf("Unable to fetch node labels: %v", err.Error())
261+
}
251262
}
263+
// isDataCacheEnabledNodePool := true
252264
nsArgs := driver.NodeServerArgs{
253265
EnableDeviceInUseCheck: *enableDeviceInUseCheck,
254266
DeviceInUseTimeout: *deviceInUseTimeout,
255267
EnableDataCache: *enableDataCacheFlag,
256-
DataCacheEnabledNodePool: isDataCacheEnabledNodePool,
268+
DataCacheEnabledNodePool: isDataCacheEnabledNodePoolCheck,
257269
}
258270
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter, nsArgs)
259271
if *maxConcurrentFormatAndMount > 0 {
@@ -264,7 +276,7 @@ func handle() {
264276
klog.Errorf("Data Cache enabled, but --node-name not passed")
265277
}
266278
if nsArgs.DataCacheEnabledNodePool {
267-
if err := setupDataCache(ctx, *nodeName, nodeServer.MetadataService.GetName()); err != nil {
279+
if err := setupDataCache(ctx, node, *nodeName); err != nil {
268280
klog.Errorf("Data Cache setup failed: %v", err)
269281
}
270282
go driver.StartWatcher(*nodeName)
@@ -351,15 +363,16 @@ func urlFlag(target **url.URL, name string, usage string) {
351363
})
352364
}
353365

354-
func isDataCacheEnabledNodePool(ctx context.Context, nodeName string) (bool, error) {
366+
func isDataCacheEnabledNodePool(ctx context.Context, node *v1.Node) (bool, error) {
355367
if !*enableDataCacheFlag {
356368
return false, nil
357369
}
358-
if len(nodeName) > 0 && nodeName != common.TestNode { // disregard logic below when E2E testing.
359-
dataCacheLSSDCount, err := driver.GetDataCacheCountFromNodeLabel(ctx, nodeName)
360-
return dataCacheLSSDCount != 0, err
361-
}
362-
return true, nil
370+
// nodeName := node.Name
371+
// if len(nodeName) > 0 && nodeName != common.TestNode { // disregard logic below when E2E testing.
372+
dataCacheLSSDCount, err := driver.GetDataCacheCountFromNodeLabel(ctx, node)
373+
return dataCacheLSSDCount != 0, err
374+
// }
375+
// return true, nil
363376
}
364377

365378
func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
@@ -394,7 +407,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
394407
return availableLssds[:lssdCount], nil
395408
}
396409

397-
func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
410+
func setupDataCache(ctx context.Context, node *v1.Node, nodeName string) error {
398411
isAlreadyRaided, err := driver.IsRaided()
399412
if err != nil {
400413
klog.V(4).Infof("Errored while scanning for available LocalSSDs err:%v; continuing Raiding", err)
@@ -404,9 +417,11 @@ func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
404417
}
405418

406419
lssdCount := common.LocalSSDCountForDataCache
420+
nodeUid := nodeName
407421
if nodeName != common.TestNode {
408-
var err error
409-
lssdCount, err = driver.GetDataCacheCountFromNodeLabel(ctx, nodeName)
422+
nodeUid = string(node.ObjectMeta.UID)
423+
// lssdCount := 4
424+
lssdCount, err = driver.GetDataCacheCountFromNodeLabel(ctx, node)
410425
if err != nil {
411426
return err
412427
}
@@ -425,7 +440,7 @@ func setupDataCache(ctx context.Context, nodeName string, nodeId string) error {
425440
}
426441

427442
// Initializing data cache node (VG checks w/ raided lssd)
428-
if err := driver.InitializeDataCacheNode(nodeId); err != nil {
443+
if err := driver.InitializeDataCacheNode(nodeUid); err != nil {
429444
return err
430445
}
431446

deploy/kubernetes/base/controller/controller.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ spec:
143143
- "--endpoint=unix:/csi/csi.sock"
144144
- "--supports-dynamic-iops-provisioning=hyperdisk-balanced,hyperdisk-extreme"
145145
- "--supports-dynamic-throughput-provisioning=hyperdisk-balanced,hyperdisk-throughput,hyperdisk-ml"
146-
- --enable-data-cache
146+
- "--run-node-service=false"
147+
- --enable-data-cache=true
147148
command:
148149
- /gce-pd-csi-driver
149150
env:

pkg/gce-pd-csi-driver/cache.go

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -248,19 +248,7 @@ func ValidateDataCacheConfig(dataCacheMode string, dataCacheSize string, ctx con
248248
return fmt.Errorf("Data Cache is not enabled for PVC (data-cache-size: %v, data-cache-mode: %v). Please set both parameters in StorageClass to enable caching", dataCacheSize, dataCacheMode)
249249
}
250250

251-
func GetDataCacheCountFromNodeLabel(ctx context.Context, nodeName string) (int, error) {
252-
cfg, err := rest.InClusterConfig()
253-
if err != nil {
254-
return 0, err
255-
}
256-
kubeClient, err := kubernetes.NewForConfig(cfg)
257-
if err != nil {
258-
return 0, err
259-
}
260-
node, err := getNodeWithRetry(ctx, kubeClient, nodeName)
261-
if err != nil {
262-
return 0, err
263-
}
251+
func GetDataCacheCountFromNodeLabel(ctx context.Context, node *v1.Node) (int, error) {
264252
if val, found := node.GetLabels()[fmt.Sprintf(common.NodeLabelPrefix, common.DataCacheLssdCountLabel)]; found {
265253
dataCacheCount, err := strconv.Atoi(val)
266254
if err != nil {
@@ -272,14 +260,22 @@ func GetDataCacheCountFromNodeLabel(ctx context.Context, nodeName string) (int,
272260
return 0, nil
273261
}
274262

275-
func getNodeWithRetry(ctx context.Context, kubeClient *kubernetes.Clientset, nodeName string) (*v1.Node, error) {
263+
func FetchNodeWithRetry(ctx context.Context, nodeName string) (*v1.Node, error) {
276264
var nodeObj *v1.Node
265+
cfg, err := rest.InClusterConfig()
266+
if err != nil {
267+
return nil, err
268+
}
269+
kubeClient, err := kubernetes.NewForConfig(cfg)
270+
if err != nil {
271+
return nil, err
272+
}
277273
backoff := wait.Backoff{
278274
Duration: 1 * time.Second,
279275
Factor: 2.0,
280276
Steps: 5,
281277
}
282-
err := wait.ExponentialBackoffWithContext(ctx, backoff, func(_ context.Context) (bool, error) {
278+
err = wait.ExponentialBackoffWithContext(ctx, backoff, func(_ context.Context) (bool, error) {
283279
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
284280
if err != nil {
285281
klog.Warningf("Error getting node %s: %v, retrying...\n", nodeName, err)

0 commit comments

Comments
 (0)