Skip to content

Enable remote debugging with delve #742

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions Dockerfile.debug
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

FROM golang:1.13.15 as builder
WORKDIR /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver
ADD . .

RUN CGO_ENABLED=0 go get -ldflags "-s -w -extldflags '-static'" github.com/go-delve/delve/cmd/dlv
RUN GCE_PD_CSI_DEBUG=1 make gce-pd-driver

# MAD HACKS: Build a version first so we can take the scsi_id bin and put it somewhere else in our real build
FROM k8s.gcr.io/build-image/debian-base-amd64:v2.1.3 as base
RUN clean-install udev

# Start from Kubernetes Debian base
FROM k8s.gcr.io/build-image/debian-base-amd64:v2.1.3

# Copy source code too to correlate the binary and the breakpoints
WORKDIR /go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver
ADD . .

COPY --from=builder /go/bin/dlv /go/bin/dlv

# Install necessary dependencies
RUN clean-install util-linux e2fsprogs mount ca-certificates udev xfsprogs
COPY --from=base /lib/udev/scsi_id /lib/udev_containerized/scsi_id

# PDCSI driver isn't copied to / because of delve not being able to correlate
# the binary and the source code, instead just run the binary where it was
# compiled, the overlay noauth-dev calls this binary
ENTRYPOINT ["/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver"]
21 changes: 20 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,12 @@ ifdef GCE_PD_CSI_STAGING_VERSION
else
STAGINGVERSION=${REV}
endif

GCFLAGS=""
ifdef GCE_PD_CSI_DEBUG
GCFLAGS="all=-N -l"
endif

STAGINGIMAGE=${GCE_PD_CSI_STAGING_IMAGE}
DRIVERBINARY=gce-pd-csi-driver
DRIVERWINDOWSBINARY=${DRIVERBINARY}.exe
Expand All @@ -34,11 +40,14 @@ BASE_IMAGE_20H2=mcr.microsoft.com/windows/servercore:20H2
# Both arrays MUST be index aligned.
WINDOWS_IMAGE_TAGS=ltsc2019 1909 2004 20H2
WINDOWS_BASE_IMAGES=$(BASE_IMAGE_LTSC2019) $(BASE_IMAGE_1909) $(BASE_IMAGE_2004) $(BASE_IMAGE_20H2)
# Development only
WINDOWS_IMAGE_TAGS_DEV=ltsc2019
WINDOWS_BASE_IMAGES_DEV=$(BASE_IMAGE_LTSC2019)

all: gce-pd-driver gce-pd-driver-windows
gce-pd-driver:
mkdir -p bin
go build -mod=vendor -ldflags "-X main.version=$(STAGINGVERSION)" -o bin/${DRIVERBINARY} ./cmd/gce-pd-csi-driver/
go build -mod=vendor -gcflags=$(GCFLAGS) -ldflags "-X main.version=$(STAGINGVERSION)" -o bin/${DRIVERBINARY} ./cmd/gce-pd-csi-driver/

gce-pd-driver-windows:
mkdir -p bin
Expand Down Expand Up @@ -76,6 +85,11 @@ build-and-push-multi-arch: build-and-push-container-linux build-and-push-windows
STAGINGIMAGE="$(STAGINGIMAGE)" STAGINGVERSION="$(STAGINGVERSION)" WINDOWS_IMAGE_TAGS="$(WINDOWS_IMAGE_TAGS)" WINDOWS_BASE_IMAGES="$(WINDOWS_BASE_IMAGES)" ./manifest_osversion.sh
$(DOCKER) manifest push -p $(STAGINGIMAGE):$(STAGINGVERSION)

build-and-push-multi-arch-dev: build-and-push-container-linux-debug build-and-push-windows-container-ltsc2019
$(DOCKER) manifest create --amend $(STAGINGIMAGE):$(STAGINGVERSION) $(STAGINGIMAGE):$(STAGINGVERSION)_linux $(STAGINGIMAGE):$(STAGINGVERSION)_ltsc2019
STAGINGIMAGE="$(STAGINGIMAGE)" STAGINGVERSION="$(STAGINGVERSION)" WINDOWS_IMAGE_TAGS="$(WINDOWS_IMAGE_TAGS_DEV)" WINDOWS_BASE_IMAGES="$(WINDOWS_BASE_IMAGES_DEV)" ./manifest_osversion.sh
$(DOCKER) manifest push -p $(STAGINGIMAGE):$(STAGINGVERSION)

push-container: build-container
gcloud docker -- push $(STAGINGIMAGE):$(STAGINGVERSION)

Expand All @@ -84,6 +98,11 @@ build-and-push-container-linux: require-GCE_PD_CSI_STAGING_IMAGE init-buildx
-t $(STAGINGIMAGE):$(STAGINGVERSION)_linux \
--build-arg TAG=$(STAGINGVERSION) --push .

build-and-push-container-linux-debug: require-GCE_PD_CSI_STAGING_IMAGE init-buildx
$(DOCKER) buildx build --file=Dockerfile.debug --platform=linux \
-t $(STAGINGIMAGE):$(STAGINGVERSION)_linux \
--build-arg TAG=$(STAGINGVERSION) --push .

test-sanity: gce-pd-driver
go test -mod=vendor --v -timeout 30s sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/test/sanity -run ^TestSanity$

Expand Down
8 changes: 4 additions & 4 deletions deploy/kubernetes/deploy-driver.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
# which are in Kubernetes version 1.10.5+

# Args:
# GCE_PD_SA_DIR: Directory the service account key has been saved in (generated
# GCE_PD_SA_DIR: Directory the service account key has been saved in (generated
# by setup-project.sh). Ignored if GCE_PD_DRIVER_VERSION == noauth.
# GCE_PD_DRIVER_VERSION: The kustomize overlay (located in
# deploy/kubernetes/overlays) to deploy. Can be one of {stable, dev}
Expand Down Expand Up @@ -44,7 +44,7 @@ while [ -n "${1-}" ]; do
esac
done

if [ "${DEPLOY_VERSION}" != noauth ]; then
if [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then
ensure_var GCE_PD_SA_DIR
fi

Expand Down Expand Up @@ -74,7 +74,7 @@ function check_service_account()

ensure_kustomize

if [ "$skip_sa_check" != true -a "${DEPLOY_VERSION}" != noauth ]; then
if [[ "$skip_sa_check" != true ]] && [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then
check_service_account
fi

Expand All @@ -83,7 +83,7 @@ then
${KUBECTL} create namespace "${NAMESPACE}" -v="${VERBOSITY}"
fi

if [ "${DEPLOY_VERSION}" != noauth ]; then
if [[ ! "${DEPLOY_VERSION}" == *noauth* ]]; then
if ! ${KUBECTL} get secret cloud-sa -v="${VERBOSITY}" -n "${NAMESPACE}";
then
${KUBECTL} create secret generic cloud-sa -v="${VERBOSITY}" --from-file="${GCE_PD_SA_DIR}/cloud-sa.json" -n "${NAMESPACE}"
Expand Down
2 changes: 1 addition & 1 deletion deploy/kubernetes/install-kustomize.sh
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ elif [[ "$OSTYPE" == darwin* ]]; then
fi

# As github has a limit on what stored in releases/, and kustomize has many different package
# versions, we just point directly at the version we want. See
# versions, we just point directly at the version we want. See
# github.com/kubernetes-sigs/kustomize/master/hack/install_kustomize.sh.

version=v3.9.4
Expand Down
34 changes: 34 additions & 0 deletions deploy/kubernetes/overlays/noauth-dev/controller-overlay.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
kind: Deployment
apiVersion: apps/v1
metadata:
name: csi-gce-pd-controller
annotations:
# https://kubernetes.io/docs/tutorials/clusters/apparmor/
container.apparmor.security.beta.kubernetes.io/gce-pd-driver: unconfined
spec:
template:
spec:
containers:
- name: gce-pd-driver
imagePullPolicy: Always
command: ["/go/bin/dlv"]
args:
- "--listen=:2345"
- "--headless=true"
- "--api-version=2"
# https://github.com/go-delve/delve/blob/master/Documentation/usage/dlv_exec.md#options
- "--accept-multiclient"
- "--continue"
- "--log"
- "exec"
- "/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver"
- "--"
- "--v=5"
- "--endpoint=unix:/csi/csi.sock"
ports:
- containerPort: 2345
securityContext:
capabilities:
add:
- SYS_PTRACE

16 changes: 16 additions & 0 deletions deploy/kubernetes/overlays/noauth-dev/kustomization.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../../base/
# Here noauth overlay is using the same image as alpha
transformers:
- ../../images/alpha
patchesStrategicMerge:
- noauth.yaml
- controller-overlay.yaml
namespace: gce-pd-csi-driver
# To change the dev image, add something like the following.
# images:
# - name: gke.gcr.io/gcp-compute-persistent-disk-csi-driver
# newName: gcr.io/mauriciopoppe-gke-dev/gcp-compute-persistent-disk-csi-driver
# newTag: latest
25 changes: 25 additions & 0 deletions deploy/kubernetes/overlays/noauth-dev/noauth.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
kind: Deployment
apiVersion: apps/v1
metadata:
name: csi-gce-pd-controller
spec:
template:
spec:
containers:
- name: gce-pd-driver
env:
- $patch: delete
name: GOOGLE_APPLICATION_CREDENTIALS
value: "/etc/cloud-sa/cloud-sa.json"
volumeMounts:
- $patch: delete
name: cloud-sa-volume
readOnly: true
mountPath: "/etc/cloud-sa"
volumes:
- $patch: delete
name: cloud-sa-volume
secret:
secretName: cloud-sa


87 changes: 86 additions & 1 deletion docs/kubernetes/development.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
## Manual

To build and install a development version of the driver:

```
$ GCE_PD_CSI_STAGING_IMAGE=gcr.io/path/to/driver/image:dev # Location to push dev image to
$ make push-container
Expand All @@ -13,9 +14,93 @@ $ ./deploy/kubernetes/deploy-driver.sh
```

To bring down driver:

```
$ ./deploy/kubernetes/delete-driver.sh
```

## TODO Testing
## Debugging

We use https://github.com/go-delve/delve and its remote debugging feature for debugging, this feature
is only available in the PD CSI Controller (which runs in a linux node)

Requirements:

- https://github.com/go-delve/delve

Steps:

- Build the PD CSI driver with additional compiler flags

```
export GCE_PD_CSI_STAGING_VERSION=latest
export GCE_PD_CSI_STAGING_IMAGE=image/repo/gcp-compute-persistent-disk-csi-driver
make build-and-push-multi-arch-dev
```

- Update `deploy/kubernetes/overlays/noauth-dev/kustomization.yaml` to match the repo you wrote above e.g.

```yaml
images:
- name: gke.gcr.io/gcp-compute-persistent-disk-csi-driver
newName: image/repo/gcp-compute-persistent-disk-csi-driver
newTag: latest
```

- Delete and deploy the driver with this overlay

```sh
./deploy/kubernetes/delete-driver.sh && \
GCE_PD_DRIVER_VERSION=noauth-dev ./deploy/kubernetes/deploy-driver.sh
```

At this point you could verify that delve is running in the controller logs:

```text
API server listening at: [::]:2345
2021-04-15T18:28:51Z info layer=debugger launching process with args: [/go/src/sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/bin/gce-pd-csi-driver --v=5 --endpoint=unix:/csi/csi.sock]
2021-04-15T18:28:53Z debug layer=debugger continuing
```

- Enable port forwading of the PD CSI controller of port 2345

```sh
kubectl -n gce-pd-csi-driver get pods | grep controller | awk '{print $1}' | xargs -I % kubectl -n gce-pd-csi-driver port-forward % 2345:2345
```

- Connect to the headless server and issue commands

```sh
dlv connect localhost:2345
Type 'help' for list of commands.
(dlv) clearall
(dlv) break pkg/gce-pd-csi-driver/controller.go:509
Breakpoint 1 set at 0x159ba32 for sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-pd-csi-driver.(*GCEControllerServer).ListVolumes() ./pkg/gce-pd-csi-driver/controller.go:509
(dlv) c
> sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-pd-csi-driver.(*GCEControllerServer).ListVolumes() ./pkg/gce-pd-csi-driver/controller.go:509 (hits goroutine(69):1 total:1) (PC: 0x159ba32)
Warning: debugging optimized function
504: }
505: }
506:
507: func (gceCS *GCEControllerServer) ListVolumes(ctx context.Context, req *csi.ListVolumesRequest) (*csi.ListVolumesResponse, error) {
508: // https//cloud.google.com/compute/docs/reference/beta/disks/list
=> 509: if req.MaxEntries < 0 {
510: return nil, status.Error(codes.InvalidArgument, fmt.Sprintf(
511: "ListVolumes got max entries request %v. GCE only supports values between 0-500", req.MaxEntries))
512: }
513: var maxEntries int64 = int64(req.MaxEntries)
514: if maxEntries > 500 {
(dlv) req
Command failed: command not available
(dlv) p req
*github.com/container-storage-interface/spec/lib/go/csi.ListVolumesRequest {
MaxEntries: 0,
StartingToken: "",
XXX_NoUnkeyedLiteral: struct {} {},
XXX_unrecognized: []uint8 len: 0, cap: 0, nil,
XXX_sizecache: 0,}
(dlv)
```

See https://github.com/kubernetes-sigs/gcp-compute-persistent-disk-csi-driver/pull/742 for the implementation details