Skip to content

Commit d35496c

Browse files
committed
Fix chunksize bug for large cache size in data cache
1 parent 39a5910 commit d35496c

File tree

2 files changed

+49
-20
lines changed

2 files changed

+49
-20
lines changed

deploy/kubernetes/base/controller/controller.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ spec:
143143
- "--endpoint=unix:/csi/csi.sock"
144144
- "--supports-dynamic-iops-provisioning=hyperdisk-balanced,hyperdisk-extreme"
145145
- "--supports-dynamic-throughput-provisioning=hyperdisk-balanced,hyperdisk-throughput,hyperdisk-ml"
146+
- --enable-controller-data-cache
146147
env:
147148
- name: GOOGLE_APPLICATION_CREDENTIALS
148149
value: "/etc/cloud-sa/cloud-sa.json"

pkg/gce-pd-csi-driver/cache.go

Lines changed: 48 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package gceGCEDriver
22

33
import (
44
"fmt"
5+
"math"
56
"regexp"
67
"strconv"
78
"strings"
@@ -14,11 +15,14 @@ import (
1415
)
1516

1617
const (
17-
cacheSuffix = "csi-fast"
18-
mainLvSuffix = "csi-main"
19-
raidedLocalSsdName = "csi-driver-data-cache"
20-
raidMode = "0"
21-
raidedLssdPrefix = "/dev/md/"
18+
cacheSuffix = "csi-fast"
19+
mainLvSuffix = "csi-main"
20+
raidedLocalSsdName = "csi-driver-data-cache"
21+
raidMode = "0"
22+
raidedLssdPrefix = "/dev/md/"
23+
maxAllowedChunks int64 = 1000000 // This is the max allowed chunks for LVM
24+
GiB int64 = 1024 * 1024 * 1024
25+
KiB int64 = 1024
2226
)
2327

2428
var raidedLocalSsdPath = raidedLssdPrefix + raidedLocalSsdName
@@ -141,21 +145,30 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
141145
// Validate that cache is setup for required size
142146
klog.V(2).Infof("Assuming valid data cache size and mode, resizing cache is not supported")
143147
} else {
144-
fastCacheSize := req.GetPublishContext()[common.ContexLocalSsdCacheSize]
145-
chunkSize := "960" // Cannot use default chunk size(64KiB) as it errors on maxChunksAllowed. Unit - KiB
146-
args = []string{
147-
"--yes",
148-
"-n",
149-
cacheLvName,
150-
"-L",
151-
// ConvertGiStringToInt64 converts the input size to GiB so default to "g" for cache size - LVM g|G is GiB.
152-
fastCacheSize + "g",
153-
volumeGroupName,
154-
raidedLocalSsdPath,
155-
}
156-
info, err = common.RunCommand("" /* pipedCmd */, "" /* pipedCmdArg */, "lvcreate", args...)
148+
cacheSize := req.GetPublishContext()[common.ContexLocalSsdCacheSize]
149+
chunkSize, err := fetchChunkSize(cacheSize)
157150
if err != nil {
158-
return mainDevicePath, fmt.Errorf("Errored while creating cache %w: %s", err, info)
151+
klog.Errorf("Errored to fetch cache size, verify the data-cache-size is valid: got %v, error: %q", cacheSize, err)
152+
return mainDevicePath, err
153+
}
154+
// Check if LV exists
155+
info, err = common.RunCommand("" /* pipedCmd */, "" /* pipedCmdArg */, "lvs", args...)
156+
lvExists := strings.Contains(string(info), cacheLvName)
157+
if !lvExists {
158+
args = []string{
159+
"--yes",
160+
"-n",
161+
cacheLvName,
162+
"-L",
163+
// ConvertGiStringToInt64 converts the input size to GiB so default to "g" for cache size - LVM g|G is GiB.
164+
cacheSize + "g",
165+
volumeGroupName,
166+
raidedLocalSsdPath,
167+
}
168+
info, err = common.RunCommand("" /* pipedCmd */, "" /* pipedCmdArg */, "lvcreate", args...)
169+
if err != nil {
170+
return mainDevicePath, fmt.Errorf("Errored while creating cache %w: %s", err, info)
171+
}
159172
}
160173

161174
// Once caching is setup, link the PD to cache
@@ -170,7 +183,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
170183
req.GetPublishContext()[common.ContextDataCacheMode],
171184
volumeGroupName + "/" + mainLvName,
172185
"--chunksize",
173-
string(chunkSize),
186+
chunkSize,
174187
"--force",
175188
"-y",
176189
}
@@ -366,3 +379,18 @@ func isCachingSetup(mainLvName string) (error, bool) {
366379
}
367380
return nil, false
368381
}
382+
383+
func fetchChunkSize(cacheSize string) (string, error) {
384+
var chunkSize float64
385+
var maxChunkSize int64 = 1 * GiB // Max allowed chunk size as per LVM documentation
386+
var minChunkSize int64 = 320 * KiB // This is randomly selected, we need a multiple of 32KiB, the default size would be too small for caching https://man7.org/linux/man-pages/man8/lvcreate.8.html (--chunksize)
387+
cacheSizeInt, err := common.ConvertGiStringToInt64(cacheSize)
388+
if err != nil {
389+
return "0", err
390+
}
391+
// Chunksize should be divisible by 32Kib so we need (chunksize/32*1024)*32*1024
392+
chunkSize = float64(cacheSizeInt) / float64(maxAllowedChunks)
393+
chunkSize = math.Ceil(chunkSize/float64(32*KiB)) * float64(32*KiB)
394+
chunkSize = math.Min(math.Max(chunkSize, float64(minChunkSize)), float64(maxChunkSize))
395+
return strconv.FormatInt(int64(chunkSize), 10), nil
396+
}

0 commit comments

Comments
 (0)