diff --git a/Dockerfile.debug b/Dockerfile.debug new file mode 100644 index 000000000..0336eec32 --- /dev/null +++ b/Dockerfile.debug @@ -0,0 +1,42 @@ +# Copyright 2021 The Kubernetes Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +FROM golang:1.13.15 as builder +WORKDIR /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver +ADD . . + +RUN CGO_ENABLED=0 go get -ldflags "-s -w -extldflags '-static'" github.com/go-delve/delve/cmd/dlv +RUN GCE_PD_CSI_DEBUG=1 make gce-pd-driver + +# MAD HACKS: Build a version first so we can take the scsi_id bin and put it somewhere else in our real build +FROM k8s.gcr.io/build-image/debian-base-amd64:buster-v1.5.0 as mad-hack +RUN clean-install udev + +# Start from Kubernetes Debian base +FROM k8s.gcr.io/build-image/debian-base-amd64:buster-v1.5.0 + +# Copy source code too to correlate the binary and the breakpoints +WORKDIR /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver +ADD . . + +COPY --from=builder /go/bin/dlv /go/bin/dlv + +# Install necessary dependencies +RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs +COPY --from=mad-hack /lib/udev/scsi_id /lib/udev_containerized/scsi_id + +# PDCSI driver isn't copied to / because of delve not being able to correlate +# the binary and the source code, instead just run the binary where it was +# compiled, the overlay noauth-dev calls this binary +ENTRYPOINT ["/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver"] diff --git a/Makefile b/Makefile index fd6b632d9..6ec2c345c 100644 --- a/Makefile +++ b/Makefile @@ -20,6 +20,12 @@ ifdef GCE_PD_CSI_STAGING_VERSION else STAGINGVERSION=${REV} endif + +GCFLAGS="" +ifdef GCE_PD_CSI_DEBUG + GCFLAGS="all=-N -l" +endif + STAGINGIMAGE=${GCE_PD_CSI_STAGING_IMAGE} DRIVERBINARY=gce-pd-csi-driver DRIVERWINDOWSBINARY=${DRIVERBINARY}.exe @@ -38,7 +44,7 @@ WINDOWS_BASE_IMAGES=$(BASE_IMAGE_LTSC2019) $(BASE_IMAGE_1909) $(BASE_IMAGE_2004) all: gce-pd-driver gce-pd-driver-windows gce-pd-driver: mkdir -p bin - go build -mod=vendor -ldflags "-X main.version=$(STAGINGVERSION)" -o bin/${DRIVERBINARY} ./cmd/gce-pd-csi-driver/ + go build -mod=vendor -gcflags=$(GCFLAGS) -ldflags "-X main.version=$(STAGINGVERSION)" -o bin/${DRIVERBINARY} ./cmd/gce-pd-csi-driver/ gce-pd-driver-windows: mkdir -p bin @@ -76,6 +82,11 @@ build-and-push-multi-arch: build-and-push-container-linux build-and-push-windows STAGINGIMAGE="$(STAGINGIMAGE)" STAGINGVERSION="$(STAGINGVERSION)" WINDOWS_IMAGE_TAGS="$(WINDOWS_IMAGE_TAGS)" WINDOWS_BASE_IMAGES="$(WINDOWS_BASE_IMAGES)" ./manifest_osversion.sh $(DOCKER) manifest push -p $(STAGINGIMAGE):$(STAGINGVERSION) +build-and-push-multi-arch-debug: build-and-push-container-linux-debug build-and-push-windows-container-ltsc2019 + $(DOCKER) manifest create --amend $(STAGINGIMAGE):$(STAGINGVERSION) $(STAGINGIMAGE):$(STAGINGVERSION)_linux $(STAGINGIMAGE):$(STAGINGVERSION)_ltsc2019 + STAGINGIMAGE="$(STAGINGIMAGE)" STAGINGVERSION="$(STAGINGVERSION)" WINDOWS_IMAGE_TAGS="ltsc2019" WINDOWS_BASE_IMAGES="$(BASE_IMAGE_LTSC2019)" ./manifest_osversion.sh + $(DOCKER) manifest push -p $(STAGINGIMAGE):$(STAGINGVERSION) + push-container: build-container gcloud docker -- push $(STAGINGIMAGE):$(STAGINGVERSION) @@ -84,6 +95,11 @@ build-and-push-container-linux: require-GCE_PD_CSI_STAGING_IMAGE init-buildx -t $(STAGINGIMAGE):$(STAGINGVERSION)_linux \ --build-arg TAG=$(STAGINGVERSION) --push . +build-and-push-container-linux-debug: require-GCE_PD_CSI_STAGING_IMAGE init-buildx + $(DOCKER) buildx build --file=Dockerfile.debug --platform=linux \ + -t $(STAGINGIMAGE):$(STAGINGVERSION)_linux \ + --build-arg TAG=$(STAGINGVERSION) --push . + test-sanity: gce-pd-driver go test -mod=vendor --v -timeout 30s sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/test/sanity -run ^TestSanity$ diff --git a/deploy/kubernetes/README.md b/deploy/kubernetes/README.md index 92949c196..c21de286d 100644 --- a/deploy/kubernetes/README.md +++ b/deploy/kubernetes/README.md @@ -16,8 +16,9 @@ The current structure for kustomization is as follows. Note that Windows support * `overlays`: It has the k8s minor version-specific driver manifest bundle. * `stable-master`: Contains deployment specs of a stable driver for k8s master. * `stable-{k8s-minor}`: Contains deployment specs of a stable driver for given k8s minor version release. - * `alpha`: Contains deployment specs for features in development. Both Linux and Windows are supported. + * `alpha`: Contains deployment specs for features in development. Both Linux and Windows are supported. * `dev`: Based on alpha, and also contains the developer's specs for use in driver development. + * `noauth-debug`: Based on alpha, used for debugging purposes only, see docs/kubernetes/development.md. * `prow-gke-release-staging-rc-master`: Used for prow tests. Contains deployment specs of a driver for latest k8s master. * `prow-gke-release-staging-rc-{k8s-minor}`: Used for prow tests. Contains deployment specs of a driver for given k8s minor version release. * `prow-gke-release-staging-rc-head`: Used for prow tests. Contains deployment specs of a driver with latest sidecar images, for latest k8s master. diff --git a/deploy/kubernetes/deploy-driver.sh b/deploy/kubernetes/deploy-driver.sh index dc0a37eff..4f6443eed 100755 --- a/deploy/kubernetes/deploy-driver.sh +++ b/deploy/kubernetes/deploy-driver.sh @@ -8,7 +8,7 @@ # which are in Kubernetes version 1.10.5+ # Args: -# GCE_PD_SA_DIR: Directory the service account key has been saved in (generated +# GCE_PD_SA_DIR: Directory the service account key has been saved in (generated # by setup-project.sh). Ignored if GCE_PD_DRIVER_VERSION == noauth. # GCE_PD_DRIVER_VERSION: The kustomize overlay (located in # deploy/kubernetes/overlays) to deploy. Can be one of {stable, dev} @@ -44,7 +44,7 @@ while [ -n "${1-}" ]; do esac done -if [ "${DEPLOY_VERSION}" != noauth ]; then +if [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then ensure_var GCE_PD_SA_DIR fi @@ -74,7 +74,7 @@ function check_service_account() ensure_kustomize -if [ "$skip_sa_check" != true -a "${DEPLOY_VERSION}" != noauth ]; then +if [[ "$skip_sa_check" != true ]] && [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then check_service_account fi @@ -83,7 +83,7 @@ then ${KUBECTL} create namespace "${NAMESPACE}" -v="${VERBOSITY}" fi -if [ "${DEPLOY_VERSION}" != noauth ]; then +if [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then if ! ${KUBECTL} get secret cloud-sa -v="${VERBOSITY}" -n "${NAMESPACE}"; then ${KUBECTL} create secret generic cloud-sa -v="${VERBOSITY}" --from-file="${GCE_PD_SA_DIR}/cloud-sa.json" -n "${NAMESPACE}" diff --git a/deploy/kubernetes/install-kustomize.sh b/deploy/kubernetes/install-kustomize.sh index 88c41913b..eaeb8ebbe 100755 --- a/deploy/kubernetes/install-kustomize.sh +++ b/deploy/kubernetes/install-kustomize.sh @@ -48,7 +48,7 @@ elif [[ "$OSTYPE" == darwin* ]]; then fi # As github has a limit on what stored in releases/, and kustomize has many different package -# versions, we just point directly at the version we want. See +# versions, we just point directly at the version we want. See # github.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh. version=v3.9.4 diff --git a/deploy/kubernetes/overlays/noauth-debug/controller-overlay.yaml b/deploy/kubernetes/overlays/noauth-debug/controller-overlay.yaml new file mode 100644 index 000000000..adc68a679 --- /dev/null +++ b/deploy/kubernetes/overlays/noauth-debug/controller-overlay.yaml @@ -0,0 +1,34 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-controller + annotations: + # https://kubernetes.io/docs/tutorials/clusters/apparmor/ + container.apparmor.security.beta.kubernetes.io/gce-pd-driver: unconfined +spec: + template: + spec: + containers: + - name: gce-pd-driver + imagePullPolicy: Always + command: ["/go/bin/dlv"] + args: + - "--listen=:2345" + - "--headless=true" + - "--api-version=2" + # https://github.com/go-delve/delve/blob/master/Documentation/usage/dlv_exec.md#options + - "--accept-multiclient" + - "--continue" + - "--log" + - "exec" + - "/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver" + - "--" + - "--v=5" + - "--endpoint=unix:/csi/csi.sock" + ports: + - containerPort: 2345 + securityContext: + capabilities: + add: + - SYS_PTRACE + diff --git a/deploy/kubernetes/overlays/noauth-debug/kustomization.yaml b/deploy/kubernetes/overlays/noauth-debug/kustomization.yaml new file mode 100644 index 000000000..d920895f0 --- /dev/null +++ b/deploy/kubernetes/overlays/noauth-debug/kustomization.yaml @@ -0,0 +1,16 @@ +apiVersion: kustomize.config.k8s.io/v1beta1 +kind: Kustomization +resources: +- ../../base/ +# Here noauth overlay is using the same image as alpha +transformers: +- ../../images/alpha +patchesStrategicMerge: +- noauth.yaml +- controller-overlay.yaml +namespace: gce-pd-csi-driver +# To change the dev image, add something like the following. +# images: +# - name: gke.gcr.io/gcp-compute-persistent-disk-csi-driver +# newName: gcr.io/mauriciopoppe-gke-dev/gcp-compute-persistent-disk-csi-driver +# newTag: latest diff --git a/deploy/kubernetes/overlays/noauth-debug/noauth.yaml b/deploy/kubernetes/overlays/noauth-debug/noauth.yaml new file mode 100644 index 000000000..3c06103c0 --- /dev/null +++ b/deploy/kubernetes/overlays/noauth-debug/noauth.yaml @@ -0,0 +1,25 @@ +kind: Deployment +apiVersion: apps/v1 +metadata: + name: csi-gce-pd-controller +spec: + template: + spec: + containers: + - name: gce-pd-driver + env: + - $patch: delete + name: GOOGLE_APPLICATION_CREDENTIALS + value: "/etc/cloud-sa/cloud-sa.json" + volumeMounts: + - $patch: delete + name: cloud-sa-volume + readOnly: true + mountPath: "/etc/cloud-sa" + volumes: + - $patch: delete + name: cloud-sa-volume + secret: + secretName: cloud-sa + + diff --git a/docs/kubernetes/development.md b/docs/kubernetes/development.md index fb4bf93ab..6cdc19fcb 100644 --- a/docs/kubernetes/development.md +++ b/docs/kubernetes/development.md @@ -3,6 +3,7 @@ ## Manual To build and install a development version of the driver: + ``` $ GCE_PD_CSI_STAGING_IMAGE=gcr.io/path/to/driver/image:dev # Location to push dev image to $ make push-container @@ -13,9 +14,93 @@ $ ./deploy/kubernetes/deploy-driver.sh ``` To bring down driver: + ``` $ ./deploy/kubernetes/delete-driver.sh ``` -## TODO Testing +## Debugging + +We use https://github.com/go-delve/delve and its feature for remote debugging. This feature +is only available in the PD CSI Controller (which runs in a linux node). + +Requirements: + +- https://github.com/go-delve/delve + +Steps: + +- Build the PD CSI driver with additional compiler flags. + +``` +export GCE_PD_CSI_STAGING_VERSION=latest +export GCE_PD_CSI_STAGING_IMAGE=image/repo/gcp-compute-persistent-disk-csi-driver +make build-and-push-multi-arch-debug +``` + +- Update `deploy/kubernetes/overlays/noauth-debug/kustomization.yaml` to match the repo you wrote above e.g. + +```yaml +images: +- name: gke.gcr.io/gcp-compute-persistent-disk-csi-driver + newName: image/repo/gcp-compute-persistent-disk-csi-driver + newTag: latest +``` + +- Delete and deploy the driver with this overlay + +```sh +./deploy/kubernetes/delete-driver.sh && \ + GCE_PD_DRIVER_VERSION=noauth-debug ./deploy/kubernetes/deploy-driver.sh +``` + +At this point you could verify that delve is running in the controller logs: + +```text +API server listening at: [::]:2345 + 2021-04-15T18:28:51Z info layer=debugger launching process with args: [/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver --v=5 --endpoint=unix:/csi/csi.sock] + 2021-04-15T18:28:53Z debug layer=debugger continuing +``` + +- Enable port forwading of the PD CSI controller of port 2345 + +```sh +kubectl -n gce-pd-csi-driver get pods | grep controller | awk '{print $1}' | xargs -I % kubectl -n gce-pd-csi-driver port-forward % 2345:2345 +``` + +- Connect to the headless server and issue commands + +```sh +dlv connect localhost:2345 +Type 'help' for list of commands. +(dlv) clearall +(dlv) break pkg/gce-pd-csi-driver/controller.go:509 +Breakpoint 1 set at 0x159ba32 for sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-pd-csi-driver.(*GCEControllerServer).ListVolumes() ./pkg/gce-pd-csi-driver/controller.go:509 +(dlv) c +> sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-pd-csi-driver.(*GCEControllerServer).ListVolumes() ./pkg/gce-pd-csi-driver/controller.go:509 (hits goroutine(69):1 total:1) (PC: 0x159ba32) +Warning: debugging optimized function + 504: } + 505: } + 506: + 507: func (gceCS *GCEControllerServer) ListVolumes(ctx context.Context, req *csi.ListVolumesRequest) (*csi.ListVolumesResponse, error) { + 508: // https//cloud.google.com/compute/docs/reference/beta/disks/list +=> 509: if req.MaxEntries < 0 { + 510: return nil, status.Error(codes.InvalidArgument, fmt.Sprintf( + 511: "ListVolumes got max entries request %v. GCE only supports values between 0-500", req.MaxEntries)) + 512: } + 513: var maxEntries int64 = int64(req.MaxEntries) + 514: if maxEntries > 500 { +(dlv) req +Command failed: command not available +(dlv) p req +*github.com/container-storage-interface/spec/lib/go/csi.ListVolumesRequest { + MaxEntries: 0, + StartingToken: "", + XXX_NoUnkeyedLiteral: struct {} {}, + XXX_unrecognized: []uint8 len: 0, cap: 0, nil, + XXX_sizecache: 0,} +(dlv) +``` + +See https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver/pull/742 for the implementation details