Skip to content

Commit 477d1cc

Browse files
committed
update test setup to avoid running Datacache setup on machines not supporting LSSDs
1 parent 1566da5 commit 477d1cc

File tree

6 files changed

+45
-33
lines changed

6 files changed

+45
-33
lines changed

cmd/gce-pd-csi-driver/main.go

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -75,7 +75,7 @@ var (
7575
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
7676
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
7777
enableHdHAFlag = flag.Bool("allow-hdha-provisioning", false, "If set to true, will allow the driver to provision Hyperdisk-balanced High Availability disks")
78-
enableDataCacheFlag = flag.Bool("enable-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
78+
enableDataCacheFlag = flag.Bool("enable-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with Data Cache configuration")
7979
nodeName = flag.String("node-name", "", "The node this driver is running on")
8080

8181
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
@@ -258,10 +258,10 @@ func handle() {
258258

259259
if *enableDataCacheFlag {
260260
if nodeName == nil || *nodeName == "" {
261-
klog.Errorf("Data cache enabled, but --node-name not passed")
261+
klog.Errorf("Data Cache enabled, but --node-name not passed")
262262
}
263263
if err := setupDataCache(ctx, *nodeName); err != nil {
264-
klog.Errorf("DataCache setup failed: %v", err)
264+
klog.Errorf("Data Cache setup failed: %v", err)
265265
}
266266
}
267267

@@ -370,7 +370,7 @@ func fetchLssdsForRaiding(lssdCount int) ([]string, error) {
370370
return nil, fmt.Errorf("Error listing LSSDs with empty mountpoint: %v", err)
371371
}
372372

373-
// We need to ensure the disks to be used for Datacache are both unRAIDed & not containing mountpoints for ephemeral storage already
373+
// We need to ensure the disks to be used for Data Cache are both unRAIDed & not containing mountpoints for ephemeral storage already
374374
availableLssds := slices.Filter(nil, unRaidedLssds, func(e string) bool {
375375
return slices.Contains(LSSDsWithEmptyMountPoint, e)
376376
})
@@ -390,31 +390,31 @@ func setupDataCache(ctx context.Context, nodeName string) error {
390390
if err != nil {
391391
klog.V(2).Infof("Errored while scanning for available LocalSSDs err:%v; continuing Raiding", err)
392392
} else if isAlreadyRaided {
393-
klog.V(2).Infof("Local SSDs are already RAIDed. Skipping Datacache setup.")
393+
klog.V(2).Infof("Local SSDs are already RAIDed. Skipping Data Cache setup.")
394394
return nil
395395
}
396396

397397
lssdCount := common.LocalSSDCountForDataCache
398398
if nodeName != common.TestNode {
399399
var err error
400400
lssdCount, err = driver.GetDataCacheCountFromNodeLabel(ctx, nodeName)
401-
if lssdCount == 0 {
402-
klog.Infof("Datacache is not enabled on node %v", nodeName)
403-
return nil
404-
}
405401
if err != nil {
406402
return err
407403
}
404+
if lssdCount == 0 {
405+
klog.Infof("Data Cache is not enabled on node %v", nodeName)
406+
return nil
407+
}
408408
}
409409
lssdNames, err := fetchLssdsForRaiding(lssdCount)
410410
if err != nil {
411-
klog.Fatalf("Failed to get sufficient SSDs for Datacache's caching setup: %v", err)
411+
klog.Fatalf("Failed to get sufficient SSDs for Data Cache's caching setup: %v", err)
412412
}
413-
klog.V(2).Infof("Raiding local ssds to setup data cache: %v", lssdNames)
413+
klog.V(2).Infof("Raiding local ssds to setup Data Cache: %v", lssdNames)
414414
if err := driver.RaidLocalSsds(lssdNames); err != nil {
415-
return fmt.Errorf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
415+
return fmt.Errorf("Failed to Raid local SSDs, unable to setup Data Cache, got error %v", err)
416416
}
417417

418-
klog.V(2).Infof("Datacache enabled for node %s", nodeName)
418+
klog.V(2).Infof("Data Cache enabled for node %s", nodeName)
419419
return nil
420420
}

pkg/common/constants.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ const (
4848
// Default LSSD count for datacache E2E tests
4949
LocalSSDCountForDataCache = 2
5050

51-
// Node label for datacache
51+
// Node label for Data Cache (only applicable to GKE nodes)
5252
NodeLabelPrefix = "cloud.google.com/%s"
5353
DataCacheLssdCountLabel = "gke-data-cache-disk"
5454
)

pkg/gce-pd-csi-driver/cache.go

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,10 @@ import (
1616
)
1717

1818
const (
19-
cacheSuffix = "csi-fast"
20-
mainLvSuffix = "csi-main"
21-
raidedLocalSsdName = "csi-driver-data-cache"
22-
raidMode = "0"
23-
initialRaidedLocalSsdPath = "/dev/md0"
19+
cacheSuffix = "csi-fast"
20+
mainLvSuffix = "csi-main"
21+
raidedLocalSsdName = "csi-driver-data-cache"
22+
raidMode = "0"
2423
)
2524

2625
func fetchRAIDedLocalSsdPath() (string, error) {
@@ -30,7 +29,7 @@ func fetchRAIDedLocalSsdPath() (string, error) {
3029
}
3130
info, err := common.RunCommand("grep", []string{raidedLocalSsdName}, "mdadm", args...)
3231
if err != nil || len(info) == 0 {
33-
return "", fmt.Errorf("Error getting RAIDed device path for Datacache %v, output:%v ===============", err, string(info))
32+
return "", fmt.Errorf("Error getting RAIDed device path for Data Cache %v, output:%v", err, string(info))
3433
}
3534
infoString := strings.TrimSpace(string(info))
3635
infoSlice := strings.Split(infoString, " ")
@@ -218,7 +217,7 @@ func ValidateDataCacheConfig(dataCacheMode string, datacacheSize string, ctx con
218217
}
219218
return nil
220219
}
221-
klog.Infof("Data cache is not enabled for PVC")
220+
klog.Infof("Data Cache is not enabled for PVC")
222221
return nil
223222
}
224223

@@ -230,36 +229,41 @@ func GetDataCacheCountFromNodeLabel(ctx context.Context, nodeName string) (int,
230229
// We want to capture API errors with node label fetching, so return -1
231230
// in those cases instead of 0.
232231
if err != nil {
233-
return -1, err
232+
return 0, err
234233
}
235234
kubeClient, err := kubernetes.NewForConfig(cfg)
236235
if err != nil {
237-
return -1, err
236+
return 0, err
238237
}
239238
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
240239
if err != nil {
241240
// We could retry, but this error will also crashloop the driver which may be as good a way to retry as any.
242-
return -1, err
241+
return 0, err
243242
}
244243
if val, found := node.GetLabels()[fmt.Sprintf(common.NodeLabelPrefix, common.DataCacheLssdCountLabel)]; found {
245244
dataCacheCount, err := strconv.Atoi(val)
246245
if err != nil {
247-
return -1, fmt.Errorf("Error getting Datacache's LSSD count from node label: %v", err)
246+
return 0, fmt.Errorf("Error getting Data Cache's LSSD count from node label: %v", err)
248247
}
249-
klog.Infof("Number of local SSDs requested for Datacache: %v", dataCacheCount)
248+
klog.Infof("Number of local SSDs requested for Data Cache: %v", dataCacheCount)
250249
return dataCacheCount, nil
251250
}
252-
return 0, fmt.Errorf("Cannot get Datacache's LSSD count from node label")
251+
// This will be returned for a non-Data-Cache node pool
252+
return 0, nil
253253
}
254254

255255
func FetchRaidedLssdCountForDatacache() (int, error) {
256+
raidedPath, err := fetchRAIDedLocalSsdPath()
257+
if err != nil {
258+
return 0, err
259+
}
256260
args := []string{
257261
"--detail",
258-
initialRaidedLocalSsdPath,
262+
raidedPath,
259263
}
260264
info, err := common.RunCommand("grep", []string{"Raid Devices"}, "mdadm", args...)
261265
if err != nil {
262-
return 0, fmt.Errorf("Error getting RAIDed devices for Datacache")
266+
return 0, fmt.Errorf("Error getting RAIDed devices for Data Cache")
263267
}
264268
if len(info) != 0 {
265269
raidedDeviceInfo := strings.Split(strings.TrimSpace(string(info)), ":")
@@ -358,6 +362,7 @@ func cleanupCache(volumeId string, nodeId string) error {
358362
// If volume group doesn't exist then there's nothing to uncache
359363
return nil
360364
}
365+
reduceVolumeGroup(volumeGroupName, true)
361366
mainLvName := getLvName(mainLvSuffix, volumeId)
362367
args := []string{
363368
"-an",
@@ -431,7 +436,7 @@ func reduceVolumeGroup(volumeGroupName string, force bool) {
431436
func RaidLocalSsds(availableLssds []string) error {
432437
args := []string{
433438
"--create",
434-
initialRaidedLocalSsdPath,
439+
raidedLocalSsdName,
435440
"--name",
436441
raidedLocalSsdName,
437442
"-l" + raidMode,

pkg/gce-pd-csi-driver/node.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -347,7 +347,7 @@ func (ns *GCENodeServer) NodeStageVolume(ctx context.Context, req *csi.NodeStage
347347
}
348348
configError := ValidateDataCacheConfig(req.GetPublishContext()[common.ContextDataCacheMode], req.GetPublishContext()[common.ContextDataCacheSize], ctx, nodeId)
349349
if configError != nil {
350-
return nil, status.Error(codes.Internal, fmt.Sprintf("Error validate configuration for Datacache: %v", err.Error()))
350+
return nil, status.Error(codes.Internal, fmt.Sprintf("Error validate configuration for Data Cache: %v", err.Error()))
351351
}
352352
devicePath, err = setupCaching(devFsPath, req, nodeId)
353353
if err != nil {

test/e2e/utils/utils.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,8 +71,11 @@ func GCEClientAndDriverSetup(instance *remote.InstanceInfo, driverConfig DriverC
7171
"--allow-hdha-provisioning",
7272
"--device-in-use-timeout=10s", // Set lower than the usual value to expedite tests
7373
fmt.Sprintf("--fallback-requisite-zones=%s", strings.Join(driverConfig.Zones, ",")),
74-
"--enable-data-cache",
75-
fmt.Sprintf("--node-name=%s", utilcommon.TestNode),
74+
}
75+
76+
if instance.GetLocalSSD() > 0 {
77+
extra_flags = append(extra_flags, "--enable-data-cache")
78+
extra_flags = append(extra_flags, fmt.Sprintf("--node-name=%s", utilcommon.TestNode))
7679
}
7780
extra_flags = append(extra_flags, fmt.Sprintf("--compute-endpoint=%s", driverConfig.ComputeEndpoint))
7881
extra_flags = append(extra_flags, driverConfig.ExtraFlags...)

test/remote/instance.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ func (i *InstanceInfo) GetNodeID() string {
8080
return common.CreateNodeID(i.cfg.Project, i.cfg.Zone, i.cfg.Name)
8181
}
8282

83+
func (i *InstanceInfo) GetLocalSSD() int64 {
84+
return i.cfg.LocalSSDCount
85+
}
86+
8387
func machineTypeMismatch(curInst *compute.Instance, newInst *compute.Instance) bool {
8488
if !strings.Contains(curInst.MachineType, newInst.MachineType) {
8589
klog.Infof("Machine type mismatch")

0 commit comments

Comments
 (0)