From bbd5817bf3a0adc74aa9966c9fb925eae44b162a Mon Sep 17 00:00:00 2001 From: Peter Schuurman Date: Wed, 16 Apr 2025 18:53:44 -0700 Subject: [PATCH] Add dev-gke-noauth overlay that allows dev version of driver to be deployed --- cmd/gce-pd-csi-driver/main.go | 10 +++--- .../base/controller/controller.yaml | 5 ++- .../controller_always_pull.yaml | 10 ++++++ .../dev-gke-noauth/csidriver_dev_name.yaml | 3 ++ .../overlays/dev-gke-noauth/driver-args.yaml | 3 ++ .../dev-gke-noauth/kustomization.yaml | 36 +++++++++++++++++++ .../overlays/dev-gke-noauth/noauth.yaml | 25 +++++++++++++ .../dev-gke-noauth/node-driver-args.yaml | 3 ++ .../dev-gke-noauth/node_always_pull.yaml | 11 ++++++ .../node_overwrite_pdcsi_path.yaml | 18 ++++++++++ pkg/metrics/metrics.go | 11 +++--- 11 files changed, 123 insertions(+), 12 deletions(-) create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/controller_always_pull.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/csidriver_dev_name.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/driver-args.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/kustomization.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/noauth.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/node-driver-args.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/node_always_pull.yaml create mode 100644 deploy/kubernetes/overlays/dev-gke-noauth/node_overwrite_pdcsi_path.yaml diff --git a/cmd/gce-pd-csi-driver/main.go b/cmd/gce-pd-csi-driver/main.go index 0cb6baf3e..1d3b0c1ab 100644 --- a/cmd/gce-pd-csi-driver/main.go +++ b/cmd/gce-pd-csi-driver/main.go @@ -94,11 +94,9 @@ var ( extraTagsStr = flag.String("extra-tags", "", "Extra tags to attach to each Compute Disk, Image, Snapshot created. It is a comma separated list of parent id, key and value like '//,...,//'. parent_id is the Organization or the Project ID or Project name where the tag key and the tag value resources exist. A maximum of 50 tags bindings is allowed for a resource. See https://cloud.google.com/resource-manager/docs/tags/tags-overview, https://cloud.google.com/resource-manager/docs/tags/tags-creating-and-managing for details") - version string -) + driverName = flag.String("driver-name", "pd.csi.storage.gke.io", "Driver name. Defaults to pd.csi.storage.gke.io") -const ( - driverName = "pd.csi.storage.gke.io" + version string ) func init() { @@ -150,7 +148,7 @@ func handle() { var metricsManager *metrics.MetricsManager = nil if *runControllerService && *httpEndpoint != "" { - mm := metrics.NewMetricsManager() + mm := metrics.NewMetricsManager(*driverName) mm.InitializeHttpHandler(*httpEndpoint, *metricsPath) mm.RegisterPDCSIMetric() @@ -272,7 +270,7 @@ func handle() { } } - err = gceDriver.SetupGCEDriver(driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer) + err = gceDriver.SetupGCEDriver(*driverName, version, extraVolumeLabels, extraTags, identityServer, controllerServer, nodeServer) if err != nil { klog.Fatalf("Failed to initialize GCE CSI Driver: %v", err.Error()) } diff --git a/deploy/kubernetes/base/controller/controller.yaml b/deploy/kubernetes/base/controller/controller.yaml index 58c0f4d1f..5dc3d74dc 100644 --- a/deploy/kubernetes/base/controller/controller.yaml +++ b/deploy/kubernetes/base/controller/controller.yaml @@ -16,7 +16,7 @@ spec: # since it replaces GCE Metadata Server with GKE Metadata Server. Remove # this requirement when issue is resolved and before any exposure of # metrics ports - hostNetwork: true + hostNetwork: false nodeSelector: kubernetes.io/os: linux serviceAccountName: csi-gce-pd-controller-sa @@ -145,6 +145,9 @@ spec: - "--supports-dynamic-iops-provisioning=hyperdisk-balanced,hyperdisk-extreme" - "--supports-dynamic-throughput-provisioning=hyperdisk-balanced,hyperdisk-throughput,hyperdisk-ml" - --enable-data-cache + - --run-node-service=false + - --multi-zone-volume-handle-disk-types=hyperdisk-ml + - --multi-zone-volume-handle-enable command: - /gce-pd-csi-driver env: diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/controller_always_pull.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/controller_always_pull.yaml new file mode 100644 index 000000000..0cbc95b74 --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/controller_always_pull.yaml @@ -0,0 +1,10 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-controller +spec: + template: + spec: + containers: + - name: gce-pd-driver + imagePullPolicy: Always \ No newline at end of file diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/csidriver_dev_name.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/csidriver_dev_name.yaml new file mode 100644 index 000000000..1a4e12147 --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/csidriver_dev_name.yaml @@ -0,0 +1,3 @@ +- op: replace + path: /metadata/name + value: pd.dev.csi.storage.gke.io \ No newline at end of file diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/driver-args.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/driver-args.yaml new file mode 100644 index 000000000..ac1f1cfdb --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/driver-args.yaml @@ -0,0 +1,3 @@ +- op: add + path: /spec/template/spec/containers/0/args/- + value: --driver-name=pd.dev.csi.storage.gke.io diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/kustomization.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/kustomization.yaml new file mode 100644 index 000000000..e1bc8dbe3 --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/kustomization.yaml @@ -0,0 +1,36 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +patchesStrategicMerge: +- controller_always_pull.yaml +- node_always_pull.yaml +- noauth.yaml +- node_overwrite_pdcsi_path.yaml +namespace: gce-pd-csi-driver +resources: +- ../../base/ +# Here dev overlay is using the same image as alpha +transformers: +- ../../images/stable-master +# Apply patches to support dynamic provisioning for hyperdisks +patches: +- path: ./driver-args.yaml + target: + group: apps + version: v1 + kind: Deployment + name: csi-gce-pd-controller +- path: ./node-driver-args.yaml + target: + group: apps + version: v1 + kind: DaemonSet + name: csi-gce-pd-node +- path: ./csidriver_dev_name.yaml + target: + kind: CSIDriver + name: pd.csi.storage.gke.io +# To change the dev image, add something like the following. +images: +- name: gke.gcr.io/gcp-compute-persistent-disk-csi-driver + newName: us-central1-docker.pkg.dev/psch-gke-dev/csi-dev/gcp-compute-persistent-disk-csi-driver + newTag: latest diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/noauth.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/noauth.yaml new file mode 100644 index 000000000..3c06103c0 --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/noauth.yaml @@ -0,0 +1,25 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-controller +spec: + template: + spec: + containers: + - name: gce-pd-driver + env: + - $patch: delete + name: GOOGLE_APPLICATION_CREDENTIALS + value: "/etc/cloud-sa/cloud-sa.json" + volumeMounts: + - $patch: delete + name: cloud-sa-volume + readOnly: true + mountPath: "/etc/cloud-sa" + volumes: + - $patch: delete + name: cloud-sa-volume + secret: + secretName: cloud-sa + + diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/node-driver-args.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/node-driver-args.yaml new file mode 100644 index 000000000..f6983dce0 --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/node-driver-args.yaml @@ -0,0 +1,3 @@ +- op: add + path: /spec/template/spec/containers/1/args/- + value: --driver-name=pd.dev.csi.storage.gke.io diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/node_always_pull.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/node_always_pull.yaml new file mode 100644 index 000000000..f20c8af8d --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/node_always_pull.yaml @@ -0,0 +1,11 @@ +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-node +spec: + template: + spec: + containers: + - name: gce-pd-driver + imagePullPolicy: Always + diff --git a/deploy/kubernetes/overlays/dev-gke-noauth/node_overwrite_pdcsi_path.yaml b/deploy/kubernetes/overlays/dev-gke-noauth/node_overwrite_pdcsi_path.yaml new file mode 100644 index 000000000..0fde8360b --- /dev/null +++ b/deploy/kubernetes/overlays/dev-gke-noauth/node_overwrite_pdcsi_path.yaml @@ -0,0 +1,18 @@ +kind: DaemonSet +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-node +spec: + template: + spec: + containers: + - name: csi-driver-registrar + args: + - "--v=5" + - "--csi-address=/csi/csi.sock" + - "--kubelet-registration-path=/var/lib/kubelet/plugins/pd.dev.csi.storage.gke.io/csi.sock" + volumes: + - name: plugin-dir + hostPath: + path: /var/lib/kubelet/plugins/pd.dev.csi.storage.gke.io/ + type: DirectoryOrCreate diff --git a/pkg/metrics/metrics.go b/pkg/metrics/metrics.go index 600a1b2f0..58e9f8924 100644 --- a/pkg/metrics/metrics.go +++ b/pkg/metrics/metrics.go @@ -32,7 +32,6 @@ const ( // envGKEPDCSIVersion is an environment variable set in the PDCSI controller manifest // with the current version of the GKE component. envGKEPDCSIVersion = "GKE_PDCSI_VERSION" - pdcsiDriverName = "pd.csi.storage.gke.io" DefaultDiskTypeForMetric = "unknownDiskType" DefaultEnableConfidentialCompute = "unknownConfidentialMode" DefaultEnableStoragePools = "unknownStoragePools" @@ -56,12 +55,14 @@ var ( ) type MetricsManager struct { - registry metrics.KubeRegistry + registry metrics.KubeRegistry + driverName string } -func NewMetricsManager() MetricsManager { +func NewMetricsManager(driverName string) MetricsManager { mm := MetricsManager{ - registry: metrics.NewKubeRegistry(), + driverName: driverName, + registry: metrics.NewKubeRegistry(), } return mm } @@ -97,7 +98,7 @@ func (mm *MetricsManager) RecordOperationErrorMetrics( enableConfidentialStorage string, enableStoragePools string) { errCode := errorCodeLabelValue(operationErr) - pdcsiOperationErrorsMetric.WithLabelValues(pdcsiDriverName, fullMethodName, errCode, diskType, enableConfidentialStorage, enableStoragePools).Inc() + pdcsiOperationErrorsMetric.WithLabelValues(mm.driverName, fullMethodName, errCode, diskType, enableConfidentialStorage, enableStoragePools).Inc() klog.Infof("Recorded PDCSI operation error code: %q", errCode) }