Skip to content

Commit a6420e8

Browse files
committed
Add support for data cache
1 parent 1fd00e4 commit a6420e8

29 files changed

+1331
-215
lines changed

Dockerfile

+44-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ FROM gke.gcr.io/debian-base:bookworm-v1.0.4-gke.2 AS debian
2727

2828
# Install necessary dependencies
2929
# google_nvme_id script depends on the following packages: nvme-cli, xxd, bash
30-
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash
30+
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs nvme-cli xxd bash kmod lvm2 mdadm
3131

3232
# Since we're leveraging apt to pull in dependencies, we use `gcr.io/distroless/base` because it includes glibc.
3333
FROM gcr.io/distroless/base-debian12 AS distroless-base
@@ -56,6 +56,35 @@ COPY --from=debian /sbin/e2fsck /sbin/e2fsck
5656
COPY --from=debian /sbin/fsck /sbin/fsck
5757
COPY --from=debian /sbin/fsck* /sbin/
5858
COPY --from=debian /sbin/fsck.xfs /sbin/fsck.xfs
59+
# Add dependencies for LVM
60+
COPY --from=debian /etc/lvm /lvm-tmp/lvm
61+
COPY --from=debian /lib/systemd/system/blk-availability.service /lib/systemd/system/blk-availability.service
62+
COPY --from=debian /lib/systemd/system/lvm2-lvmpolld.service /lib/systemd/system/lvm2-lvmpolld.service
63+
COPY --from=debian /lib/systemd/system/lvm2-lvmpolld.socket /lib/systemd/system/lvm2-lvmpolld.socket
64+
COPY --from=debian /lib/systemd/system/lvm2-monitor.service /lib/systemd/system/lvm2-monitor.service
65+
COPY --from=debian /lib/udev/rules.d/56-lvm.rules /lib/udev/rules.d/56-lvm.rules
66+
COPY --from=debian /sbin/fsadm /sbin/fsadm
67+
COPY --from=debian /sbin/lvm /sbin/lvm
68+
COPY --from=debian /sbin/lvmdump /sbin/lvmdump
69+
COPY --from=debian /sbin/lvmpolld /sbin/lvmpolld
70+
COPY --from=debian /usr/lib/tmpfiles.d /usr/lib/tmpfiles.d
71+
COPY --from=debian /usr/lib/tmpfiles.d/lvm2.conf /usr/lib/tmpfiles.d/lvm2.conf
72+
COPY --from=debian /sbin/lv* /sbin/
73+
COPY --from=debian /sbin/pv* /sbin/
74+
COPY --from=debian /sbin/vg* /sbin/
75+
COPY --from=debian /bin/lsblk /bin/lsblk
76+
COPY --from=debian /sbin/modprobe /sbin/modprobe
77+
COPY --from=debian /lib/udev /lib/udev
78+
COPY --from=debian /lib/udev/rules.d /lib/udev/rules.d
79+
COPY --from=debian /lib/udev/rules.d/55-dm.rules /lib/udev/rules.d/55-dm.rules
80+
COPY --from=debian /lib/udev/rules.d/60-persistent-storage-dm.rules /lib/udev/rules.d/60-persistent-storage-dm.rules
81+
COPY --from=debian /lib/udev/rules.d/95-dm-notify.rules /lib/udev/rules.d/95-dm-notify.rules
82+
COPY --from=debian /sbin/blkdeactivate /sbin/blkdeactivate
83+
COPY --from=debian /sbin/dmsetup /sbin/dmsetup
84+
COPY --from=debian /sbin/dmstats /sbin/dmstats
85+
COPY --from=debian /bin/ls /bin/ls
86+
# End of dependencies for LVM
87+
COPY --from=debian /sbin/mdadm /sbin/mdadm
5988
COPY --from=debian /sbin/mke2fs /sbin/mke2fs
6089
COPY --from=debian /sbin/mkfs* /sbin/
6190
COPY --from=debian /sbin/resize2fs /sbin/resize2fs
@@ -71,14 +100,20 @@ COPY --from=debian /bin/date /bin/date
71100
COPY --from=debian /bin/grep /bin/grep
72101
COPY --from=debian /bin/sed /bin/sed
73102
COPY --from=debian /bin/ln /bin/ln
103+
COPY --from=debian /bin/cp /bin/cp
74104
COPY --from=debian /bin/udevadm /bin/udevadm
75105

76106
# Copy shared libraries into distroless base.
77107
COPY --from=debian /lib/${LIB_DIR_PREFIX}-linux-gnu/libselinux.so.1 \
108+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libdl.so.2 \
109+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libpthread.so.0 \
78110
/lib/${LIB_DIR_PREFIX}-linux-gnu/libtinfo.so.6 \
79111
/lib/${LIB_DIR_PREFIX}-linux-gnu/libe2p.so.2 \
80112
/lib/${LIB_DIR_PREFIX}-linux-gnu/libcom_err.so.2 \
81113
/lib/${LIB_DIR_PREFIX}-linux-gnu/libdevmapper.so.1.02.1 \
114+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libm.so.6 \
115+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libc.so.6 \
116+
/lib/${LIB_DIR_PREFIX}-linux-gnu/libdevmapper-event.so.1.02.1 \
82117
/lib/${LIB_DIR_PREFIX}-linux-gnu/libext2fs.so.2 \
83118
/lib/${LIB_DIR_PREFIX}-linux-gnu/libgcc_s.so.1 \
84119
/lib/${LIB_DIR_PREFIX}-linux-gnu/liblzma.so.5 \
@@ -99,11 +134,17 @@ COPY --from=debian /lib/${LIB_DIR_PREFIX}-linux-gnu/libselinux.so.1 \
99134
/lib/${LIB_DIR_PREFIX}-linux-gnu/libzstd.so.1 /lib/${LIB_DIR_PREFIX}-linux-gnu/
100135

101136
COPY --from=debian /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libblkid.so.1 \
137+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libsmartcols.so.1 \
102138
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libbsd.so.0 \
103139
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libinih.so.1 \
104140
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libmount.so.1 \
105141
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libudev.so.1 \
106142
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libuuid.so.1 \
143+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libzstd.so.1 \
144+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libaio.so.1 \
145+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libgcrypt.so.20 \
146+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libsystemd.so.0 \
147+
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/liblz4.so.1 \
107148
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libacl.so.1 \
108149
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libattr.so.1 \
109150
/usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libedit.so.2 \
@@ -118,4 +159,5 @@ COPY --from=debian /usr/lib/${LIB_DIR_PREFIX}-linux-gnu/libblkid.so.1 \
118159
# Copy NVME support required script and rules into distroless base.
119160
COPY deploy/kubernetes/udev/google_nvme_id /lib/udev_containerized/google_nvme_id
120161

121-
ENTRYPOINT ["/gce-pd-csi-driver"]
162+
COPY --from=builder /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/initialize-driver.sh /initialize-driver.sh
163+
ENTRYPOINT ["/initialize-driver.sh"]

cmd/gce-pd-csi-driver/main.go

+55-7
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,12 @@ import (
2727
"strings"
2828
"time"
2929

30+
"k8s.io/client-go/kubernetes"
31+
"k8s.io/client-go/rest"
3032
"k8s.io/klog/v2"
3133
"k8s.io/utils/strings/slices"
3234

35+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
3336
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/common"
3437
"sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/deviceutils"
3538
gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute"
@@ -71,11 +74,14 @@ var (
7174
maxConcurrentFormat = flag.Int("max-concurrent-format", 1, "The maximum number of concurrent format exec calls")
7275
concurrentFormatTimeout = flag.Duration("concurrent-format-timeout", 1*time.Minute, "The maximum duration of a format operation before its concurrency token is released")
7376

74-
maxConcurrentFormatAndMount = flag.Int("max-concurrent-format-and-mount", 1, "If set then format and mount operations are serialized on each node. This is stronger than max-concurrent-format as it includes fsck and other mount operations")
75-
formatAndMountTimeout = flag.Duration("format-and-mount-timeout", 1*time.Minute, "The maximum duration of a format and mount operation before another such operation will be started. Used only if --serialize-format-and-mount")
76-
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
77-
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
78-
enableHdHAFlag = flag.Bool("allow-hdha-provisioning", false, "If set to true, will allow the driver to provision Hyperdisk-balanced High Availability disks")
77+
maxConcurrentFormatAndMount = flag.Int("max-concurrent-format-and-mount", 1, "If set then format and mount operations are serialized on each node. This is stronger than max-concurrent-format as it includes fsck and other mount operations")
78+
formatAndMountTimeout = flag.Duration("format-and-mount-timeout", 1*time.Minute, "The maximum duration of a format and mount operation before another such operation will be started. Used only if --serialize-format-and-mount")
79+
fallbackRequisiteZonesFlag = flag.String("fallback-requisite-zones", "", "Comma separated list of requisite zones that will be used if there are not sufficient zones present in requisite topologies when provisioning a disk")
80+
enableStoragePoolsFlag = flag.Bool("enable-storage-pools", false, "If set to true, the CSI Driver will allow volumes to be provisioned in Storage Pools")
81+
enableHdHAFlag = flag.Bool("allow-hdha-provisioning", false, "If set to true, will allow the driver to provision Hyperdisk-balanced High Availability disks")
82+
enableControllerDataCacheFlag = flag.Bool("enable-controller-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
83+
enableNodeDataCacheFlag = flag.Bool("enable-node-data-cache", false, "If set to true, the CSI Driver will allow volumes to be provisioned with data cache configuration")
84+
nodeName = flag.String("node-name", "", "The node this driver is running on")
7985

8086
multiZoneVolumeHandleDiskTypesFlag = flag.String("multi-zone-volume-handle-disk-types", "", "Comma separated list of allowed disk types that can use the multi-zone volumeHandle. Used only if --multi-zone-volume-handle-enable")
8187
multiZoneVolumeHandleEnableFlag = flag.Bool("multi-zone-volume-handle-enable", false, "If set to true, the multi-zone volumeHandle feature will be enabled")
@@ -97,7 +103,9 @@ var (
97103
)
98104

99105
const (
100-
driverName = "pd.csi.storage.gke.io"
106+
driverName = "pd.csi.storage.gke.io"
107+
dataCacheLabel = "datacache-storage-gke-io"
108+
dataCacheLabelValue = "enabled"
101109
)
102110

103111
func init() {
@@ -226,7 +234,7 @@ func handle() {
226234
}
227235
initialBackoffDuration := time.Duration(*errorBackoffInitialDurationMs) * time.Millisecond
228236
maxBackoffDuration := time.Duration(*errorBackoffMaxDurationMs) * time.Millisecond
229-
controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration, fallbackRequisiteZones, *enableStoragePoolsFlag, multiZoneVolumeHandleConfig, listVolumesConfig, provisionableDisksConfig, *enableHdHAFlag)
237+
controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration, fallbackRequisiteZones, *enableStoragePoolsFlag, *enableControllerDataCacheFlag, multiZoneVolumeHandleConfig, listVolumesConfig, provisionableDisksConfig, *enableHdHAFlag)
230238
} else if *cloudConfigFilePath != "" {
231239
klog.Warningf("controller service is disabled but cloud config given - it has no effect")
232240
}
@@ -247,13 +255,23 @@ func handle() {
247255
nsArgs := driver.NodeServerArgs{
248256
EnableDeviceInUseCheck: *enableDeviceInUseCheck,
249257
DeviceInUseTimeout: *deviceInUseTimeout,
258+
EnableDataCache: *enableNodeDataCacheFlag,
250259
}
251260
nodeServer = driver.NewNodeServer(gceDriver, mounter, deviceUtils, meta, statter, nsArgs)
252261
if *maxConcurrentFormatAndMount > 0 {
253262
nodeServer = nodeServer.WithSerializedFormatAndMount(*formatAndMountTimeout, *maxConcurrentFormatAndMount)
254263
}
255264
}
256265

266+
if *enableNodeDataCacheFlag {
267+
if nodeName == nil || *nodeName == "" {
268+
klog.Errorf("Data cache enabled, but --node-name not passed")
269+
}
270+
if err := setupDataCache(ctx, *nodeName); err != nil {
271+
klog.Errorf("DataCache setup failed: %v", err)
272+
}
273+
}
274+
257275
err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer)
258276
if err != nil {
259277
klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err.Error())
@@ -332,3 +350,33 @@ func urlFlag(target **url.URL, name string, usage string) {
332350
return err
333351
})
334352
}
353+
354+
func setupDataCache(ctx context.Context, nodeName string) error {
355+
klog.V(2).Infof("Seting up data cache for node %s", nodeName)
356+
if nodeName != common.TestNode {
357+
cfg, err := rest.InClusterConfig()
358+
if err != nil {
359+
return err
360+
}
361+
kubeClient, err := kubernetes.NewForConfig(cfg)
362+
if err != nil {
363+
return err
364+
}
365+
node, err := kubeClient.CoreV1().Nodes().Get(ctx, nodeName, metav1.GetOptions{})
366+
if err != nil {
367+
// We could retry, but this error will also crashloop the driver which may be as good a way to retry as any.
368+
return err
369+
}
370+
if val, found := node.GetLabels()[dataCacheLabel]; !found || val != dataCacheLabelValue {
371+
klog.V(2).Infof("Datacache not enabled for node %s; node label %s=%s and not %s", nodeName, dataCacheLabel, val, dataCacheLabelValue)
372+
return nil
373+
}
374+
}
375+
klog.V(2).Info("Raiding local ssds to setup data cache")
376+
if err := driver.RaidLocalSsds(); err != nil {
377+
return fmt.Errorf("Failed to Raid local SSDs, unable to setup data caching, got error %v", err)
378+
}
379+
380+
klog.V(2).Infof("Datacache enabled for node %s", nodeName)
381+
return nil
382+
}

deploy/kubernetes/base/controller/cluster_setup.yaml

+3
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,9 @@ rules:
205205
verbs: ['use']
206206
resourceNames:
207207
- csi-gce-pd-node-psp
208+
- apiGroups: [""]
209+
resources: ["nodes"]
210+
verbs: ["get", "list"]
208211
---
209212

210213
kind: ClusterRole

deploy/kubernetes/base/node_linux/node.yaml

+13
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,15 @@ spec:
4646
- "--v=5"
4747
- "--endpoint=unix:/csi/csi.sock"
4848
- "--run-controller-service=false"
49+
- "--enable-node-data-cache"
50+
- "--node-name=$(KUBE_NODE_NAME)"
4951
securityContext:
5052
privileged: true
53+
env:
54+
- name: KUBE_NODE_NAME
55+
valueFrom:
56+
fieldRef:
57+
fieldPath: spec.nodeName
5158
volumeMounts:
5259
- name: kubelet-dir
5360
mountPath: /var/lib/kubelet
@@ -66,6 +73,8 @@ spec:
6673
mountPath: /run/udev
6774
- name: sys
6875
mountPath: /sys
76+
- name: lib-modules
77+
mountPath: /lib/modules
6978
volumes:
7079
- name: registration-dir
7180
hostPath:
@@ -101,6 +110,10 @@ spec:
101110
hostPath:
102111
path: /sys
103112
type: Directory
113+
- name: lib-modules
114+
hostPath:
115+
path: /lib/modules
116+
type: Directory
104117
# https://kubernetes.io/docs/concepts/configuration/taint-and-toleration/
105118
# See "special case". This will tolerate everything. Node component should
106119
# be scheduled on all nodes.

initialize-driver.sh

+9
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
#!/bin/bash
2+
3+
/bin/cp -r /lvm-tmp/lvm /etc/
4+
/bin/sed -i -e "s/.*allow_mixed_block_sizes = 0.*/ allow_mixed_block_sizes = 1/" /etc/lvm/lvm.conf
5+
/bin/sed -i -e "s/.*udev_sync = 1.*/ udev_sync = 0/" /etc/lvm/lvm.conf
6+
/bin/sed -i -e "s/.*udev_rules = 1.*/ udev_rules = 0/" /etc/lvm/lvm.conf
7+
/bin/sed -i -e "s/.*locking_dir = .*/ locking_dir = \"\/tmp\"/" /etc/lvm/lvm.conf
8+
9+
/gce-pd-csi-driver "$@"

pkg/common/constants.go

+12
Original file line numberDiff line numberDiff line change
@@ -32,4 +32,16 @@ const (
3232

3333
// Label that is set on a disk when it is used by a 'multi-zone' VolumeHandle
3434
MultiZoneLabel = "goog-gke-multi-zone"
35+
36+
// Data cache mode
37+
DataCacheModeWriteBack = "writeback"
38+
DataCacheModeWriteThrough = "writethrough"
39+
40+
ContextDataCacheSize = "data-cache-size"
41+
ContextDataCacheMode = "data-cache-mode"
42+
43+
// Keys in the publish context
44+
ContexLocalSsdCacheSize = "local-ssd-cache-size"
45+
// Node name for E2E tests
46+
TestNode = "test-node-csi-e2e"
3547
)

0 commit comments

Comments
 (0)