diff --git a/pkg/common/constants.go b/pkg/common/constants.go index a6003b654..d5e5c1016 100644 --- a/pkg/common/constants.go +++ b/pkg/common/constants.go @@ -57,3 +57,16 @@ const ( NodeLabelPrefix = "cloud.google.com/%s" DataCacheLssdCountLabel = "gke-data-cache-disk" ) + +// doc https://cloud.google.com/compute/docs/disks/hyperdisks#max-total-disks-per-vm +var Gen4MachineHyperdiskAttachLimitMap = []struct { + max int64 + value int64 +}{ + {max: 4, value: 16}, + {max: 8, value: 24}, + {max: 16, value: 32}, + {max: 32, value: 48}, + {max: 64, value: 64}, + {max: 1024, value: 128}, +} diff --git a/pkg/common/utils.go b/pkg/common/utils.go index 3abe5ddec..719dcb1d3 100644 --- a/pkg/common/utils.go +++ b/pkg/common/utils.go @@ -754,3 +754,13 @@ func ShortString(s string) string { } return string(short) } + +// MapNumber is a function to map input cpu number to the Hyperdisk attach limit +func MapNumber(num int64) int64 { + for _, r := range Gen4MachineHyperdiskAttachLimitMap { + if num <= r.max { + return r.value + } + } + return 0 +} diff --git a/pkg/gce-pd-csi-driver/node.go b/pkg/gce-pd-csi-driver/node.go index 52abeb8e0..f45d159b7 100644 --- a/pkg/gce-pd-csi-driver/node.go +++ b/pkg/gce-pd-csi-driver/node.go @@ -23,6 +23,7 @@ import ( "regexp" "runtime" "strconv" + "strings" "time" "google.golang.org/grpc/codes" @@ -95,8 +96,12 @@ var _ csi.NodeServer = &GCENodeServer{} // node boot disk is considered an attachable disk so effective attach limit is // one less. const ( - volumeLimitSmall int64 = 15 - volumeLimitBig int64 = 127 + volumeLimitSmall int64 = 15 + volumeLimitBig int64 = 127 + // doc https://cloud.google.com/compute/docs/memory-optimized-machines#x4_disks + x4HyperdiskLimit int64 = 40 + // doc https://cloud.google.com/compute/docs/accelerator-optimized-machines#a4-disks + a4HyperdiskLimit int64 = 128 defaultLinuxFsType = "ext4" defaultWindowsFsType = "ntfs" fsTypeExt3 = "ext3" @@ -567,6 +572,9 @@ func (ns *GCENodeServer) NodeGetInfo(ctx context.Context, req *csi.NodeGetInfoRe nodeID := common.CreateNodeID(ns.MetadataService.GetProject(), ns.MetadataService.GetZone(), ns.MetadataService.GetName()) volumeLimits, err := ns.GetVolumeLimits() + if err != nil { + klog.Errorf("GetVolumeLimits failed: %v", err.Error()) + } resp := &csi.NodeGetInfoResponse{ NodeId: nodeID, @@ -733,5 +741,24 @@ func (ns *GCENodeServer) GetVolumeLimits() (int64, error) { return volumeLimitSmall, nil } } + gen4MachineTypesPrefix := []string{"c4a-", "c4-", "n4-"} + for _, gen4Prefix := range gen4MachineTypesPrefix { + if strings.HasPrefix(machineType, gen4Prefix) { + cpuString := machineType[strings.LastIndex(machineType, "-")+1:] + cpus, err := strconv.ParseInt(cpuString, 10, 64) + if err != nil { + return volumeLimitSmall, fmt.Errorf("invalid cpuString %s for machine type: %v", cpuString, machineType) + } + return common.MapNumber(cpus), nil + + } + if strings.HasPrefix(machineType, "x4-") { + return x4HyperdiskLimit, nil + } + if strings.HasPrefix(machineType, "a4-") { + return a4HyperdiskLimit, nil + } + } + return volumeLimitBig, nil } diff --git a/pkg/gce-pd-csi-driver/node_test.go b/pkg/gce-pd-csi-driver/node_test.go index ee016df0c..57c3e82b9 100644 --- a/pkg/gce-pd-csi-driver/node_test.go +++ b/pkg/gce-pd-csi-driver/node_test.go @@ -224,6 +224,7 @@ func TestNodeGetVolumeLimits(t *testing.T) { name string machineType string expVolumeLimit int64 + expectError bool }{ { name: "Predifined standard machine", @@ -255,13 +256,54 @@ func TestNodeGetVolumeLimits(t *testing.T) { machineType: "e2-micro", expVolumeLimit: volumeLimitSmall, }, + { + name: "c4-standard-192", + machineType: "c4-standard-192", + expVolumeLimit: 128, + }, + { + name: "c4-standard-48", + machineType: "c4-standard-48", + expVolumeLimit: 64, + }, + { + name: "c4a-standard-4", + machineType: "c4a-standard-4", + expVolumeLimit: 16, + }, + { + name: "n4-standard-16", + machineType: "n4-standard-16", + expVolumeLimit: 32, + }, + { + name: "n4-highcpu-4", + machineType: "n4-highcpu-4", + expVolumeLimit: 16, + }, + { + name: "invalid gen4 machine type", + machineType: "n4-highcpu-4xyz", + expVolumeLimit: volumeLimitSmall, + expectError: true, + }, + { + name: "x4-megamem-960-metal", + machineType: "x4-megamem-960-metal", + expVolumeLimit: x4HyperdiskLimit, + }, + { + name: "a4-highgpu-8g", + machineType: "a4-highgpu-8g", + expVolumeLimit: a4HyperdiskLimit, + }, } for _, tc := range testCases { t.Logf("Test case: %s", tc.name) metadataservice.SetMachineType(tc.machineType) res, err := ns.NodeGetInfo(context.Background(), req) - if err != nil { + if err != nil && !tc.expectError { t.Fatalf("Failed to get node info: %v", err) } else { volumeLimit := res.GetMaxVolumesPerNode()