Skip to content

Commit 4ef1224

Browse files
committed
Fix chunksize bug for large cache size in data cache
1 parent ed6b156 commit 4ef1224

File tree

2 files changed

+48
-19
lines changed

2 files changed

+48
-19
lines changed

deploy/kubernetes/base/controller/controller.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,7 @@ spec:
143143
- "--endpoint=unix:/csi/csi.sock"
144144
- "--supports-dynamic-iops-provisioning=hyperdisk-balanced,hyperdisk-extreme"
145145
- "--supports-dynamic-throughput-provisioning=hyperdisk-balanced,hyperdisk-throughput,hyperdisk-ml"
146+
- --enable-controller-data-cache
146147
env:
147148
- name: GOOGLE_APPLICATION_CREDENTIALS
148149
value: "/etc/cloud-sa/cloud-sa.json"

pkg/gce-pd-csi-driver/cache.go

+47-19
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package gceGCEDriver
33
import (
44
"context"
55
"fmt"
6+
"math"
67
"regexp"
78
"strconv"
89
"strings"
@@ -16,10 +17,13 @@ import (
1617
)
1718

1819
const (
19-
cacheSuffix = "csi-fast"
20-
mainLvSuffix = "csi-main"
21-
raidedLocalSsdName = "csi-driver-data-cache"
22-
raidMode = "0"
20+
cacheSuffix = "csi-fast"
21+
mainLvSuffix = "csi-main"
22+
raidedLocalSsdName = "csi-driver-data-cache"
23+
raidMode = "0"
24+
maxAllowedChunks int64 = 1000000 // This is the max allowed chunks for LVM
25+
GiB int64 = 1024 * 1024 * 1024
26+
KiB int64 = 1024
2327
)
2428

2529
func fetchRAIDedLocalSsdPath() (string, error) {
@@ -159,21 +163,30 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
159163
// Validate that cache is setup for required size
160164
klog.V(4).Infof("Assuming valid data cache size and mode, resizing cache is not supported")
161165
} else {
162-
fastCacheSize := req.GetPublishContext()[common.ContextDataCacheSize]
163-
chunkSize := "960" // Cannot use default chunk size(64KiB) as it errors on maxChunksAllowed. Unit - KiB
164-
args = []string{
165-
"--yes",
166-
"-n",
167-
cacheLvName,
168-
"-L",
169-
// ConvertGiStringToInt64 converts the input size to GiB so default to "g" for cache size - LVM g|G is GiB.
170-
fastCacheSize + "g",
171-
volumeGroupName,
172-
raidedLocalSsdPath,
173-
}
174-
info, err = common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "lvcreate", args...)
166+
cacheSize := req.GetPublishContext()[common.ContextDataCacheSize]
167+
chunkSize, err := fetchChunkSize(cacheSize)
175168
if err != nil {
176-
return mainDevicePath, fmt.Errorf("Errored while creating cache %w: %s", err, info)
169+
klog.Errorf("Errored to fetch cache size, verify the data-cache-size is valid: got %v, error: %q", cacheSize, err)
170+
return mainDevicePath, err
171+
}
172+
// Check if LV exists
173+
info, err = common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "lvs", args...)
174+
lvExists := strings.Contains(string(info), cacheLvName)
175+
if !lvExists {
176+
args = []string{
177+
"--yes",
178+
"-n",
179+
cacheLvName,
180+
"-L",
181+
// ConvertGiStringToInt64 converts the input size to GiB so default to "g" for cache size - LVM g|G is GiB.
182+
cacheSize + "g",
183+
volumeGroupName,
184+
raidedLocalSsdPath,
185+
}
186+
info, err = common.RunCommand("" /* pipedCmd */, nil /* pipedCmdArg */, "lvcreate", args...)
187+
if err != nil {
188+
return mainDevicePath, fmt.Errorf("Errored while creating cache %w: %s", err, info)
189+
}
177190
}
178191

179192
// Once caching is setup, link the PD to cache
@@ -188,7 +201,7 @@ func setupCaching(devicePath string, req *csi.NodeStageVolumeRequest, nodeId str
188201
req.GetPublishContext()[common.ContextDataCacheMode],
189202
volumeGroupName + "/" + mainLvName,
190203
"--chunksize",
191-
string(chunkSize),
204+
chunkSize,
192205
"--force",
193206
"-y",
194207
}
@@ -497,3 +510,18 @@ func isCachingSetup(mainLvName string) (error, bool) {
497510
}
498511
return nil, false
499512
}
513+
514+
func fetchChunkSize(cacheSize string) (string, error) {
515+
var chunkSize float64
516+
var maxChunkSize int64 = 1 * GiB // Max allowed chunk size as per LVM documentation
517+
var minChunkSize int64 = 320 * KiB // This is randomly selected, we need a multiple of 32KiB, the default size would be too small for caching https://man7.org/linux/man-pages/man8/lvcreate.8.html (--chunksize)
518+
cacheSizeInt, err := common.ConvertGiStringToInt64(cacheSize)
519+
if err != nil {
520+
return "0", err
521+
}
522+
// Chunksize should be divisible by 32Kib so we need (chunksize/32*1024)*32*1024
523+
chunkSize = float64(cacheSizeInt) / float64(maxAllowedChunks)
524+
chunkSize = math.Ceil(chunkSize/float64(32*KiB)) * float64(32*KiB)
525+
chunkSize = math.Min(math.Max(chunkSize, float64(minChunkSize)), float64(maxChunkSize))
526+
return strconv.FormatInt(int64(chunkSize), 10), nil
527+
}

0 commit comments

Comments
 (0)