diff --git a/cmd/gce-pd-csi-driver/main.go b/cmd/gce-pd-csi-driver/main.go index 767cd190c..ba056b550 100644 --- a/cmd/gce-pd-csi-driver/main.go +++ b/cmd/gce-pd-csi-driver/main.go @@ -40,8 +40,11 @@ var ( runNodeService = flag.Bool("run-node-service", true, "If set to false then the CSI driver does not activate its node service (default: true)") httpEndpoint = flag.String("http-endpoint", "", "The TCP network address where the prometheus metrics endpoint will listen (example: `:8080`). The default is empty string, which means metrics endpoint is disabled.") metricsPath = flag.String("metrics-path", "/metrics", "The HTTP path where prometheus metrics will be exposed. Default is `/metrics`.") + grpcLogCharCap = flag.Int("grpc-log-char-cap", 10000, "The maximum amount of characters logged for every grpc responses") - extraVolumeLabelsStr = flag.String("extra-labels", "", "Extra labels to attach to each PD created. It is a comma separated list of key value pairs like '=,='. See https://cloud.google.com/compute/docs/labeling-resources for details") + errorBackoffInitialDurationMs = flag.Int("backoff-initial-duration-ms", 200, "The amount of ms for the initial duration of the backoff condition for controller publish/unpublish CSI operations. Default is 200.") + errorBackoffMaxDurationMs = flag.Int("backoff-max-duration-ms", 300000, "The amount of ms for the max duration of the backoff condition for controller publish/unpublish CSI operations. Default is 300000 (5m).") + extraVolumeLabelsStr = flag.String("extra-labels", "", "Extra labels to attach to each PD created. It is a comma separated list of key value pairs like '=,='. See https://cloud.google.com/compute/docs/labeling-resources for details") attachDiskBackoffDuration = flag.Duration("attach-disk-backoff-duration", 5*time.Second, "Duration for attachDisk backoff") attachDiskBackoffFactor = flag.Float64("attach-disk-backoff-factor", 0.0, "Factor for attachDisk backoff") @@ -121,7 +124,9 @@ func handle() { if err != nil { klog.Fatalf("Failed to get cloud provider: %v", err) } - controllerServer = driver.NewControllerServer(gceDriver, cloudProvider) + initialBackoffDuration := time.Duration(*errorBackoffInitialDurationMs) * time.Millisecond + maxBackoffDuration := time.Duration(*errorBackoffMaxDurationMs) * time.Microsecond + controllerServer = driver.NewControllerServer(gceDriver, cloudProvider, initialBackoffDuration, maxBackoffDuration) } else if *cloudConfigFilePath != "" { klog.Warningf("controller service is disabled but cloud config given - it has no effect") } @@ -159,5 +164,5 @@ func handle() { gce.WaitForOpBackoff.Steps = *waitForOpBackoffSteps gce.WaitForOpBackoff.Cap = *waitForOpBackoffCap - gceDriver.Run(*endpoint) + gceDriver.Run(*endpoint, *grpcLogCharCap) } diff --git a/pkg/gce-pd-csi-driver/controller.go b/pkg/gce-pd-csi-driver/controller.go index 19ccec2a5..36f6caa7c 100644 --- a/pkg/gce-pd-csi-driver/controller.go +++ b/pkg/gce-pd-csi-driver/controller.go @@ -37,11 +37,6 @@ import ( gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute" ) -const ( - errorBackoffInitialDuration = 200 * time.Millisecond - errorBackoffMaxDuration = 5 * time.Minute -) - type GCEControllerServer struct { Driver *GCEDriver CloudProvider gce.GCECompute @@ -517,7 +512,7 @@ func (gceCS *GCEControllerServer) executeControllerPublishVolume(ctx context.Con attached, err := diskIsAttachedAndCompatible(deviceName, instance, volumeCapability, readWrite) if err != nil { - return nil, status.Error(codes.AlreadyExists, fmt.Sprintf("Disk %v already published to node %v but incompatbile: %v", volKey.Name, nodeID, err)) + return nil, status.Error(codes.AlreadyExists, fmt.Sprintf("Disk %v already published to node %v but incompatible: %v", volKey.Name, nodeID, err)) } if attached { // Volume is attached to node. Success! @@ -1597,8 +1592,8 @@ func pickRandAndConsecutive(slice []string, n int) ([]string, error) { return ret, nil } -func newCsiErrorBackoff() *csiErrorBackoff { - return &csiErrorBackoff{flowcontrol.NewBackOff(errorBackoffInitialDuration, errorBackoffMaxDuration)} +func newCsiErrorBackoff(initialDuration, errorBackoffMaxDuration time.Duration) *csiErrorBackoff { + return &csiErrorBackoff{flowcontrol.NewBackOff(initialDuration, errorBackoffMaxDuration)} } func (_ *csiErrorBackoff) backoffId(nodeId, volumeId string) csiErrorBackoffId { diff --git a/pkg/gce-pd-csi-driver/controller_test.go b/pkg/gce-pd-csi-driver/controller_test.go index 2e6fae83a..758ed286e 100644 --- a/pkg/gce-pd-csi-driver/controller_test.go +++ b/pkg/gce-pd-csi-driver/controller_test.go @@ -2149,6 +2149,8 @@ type backoffTesterConfig struct { } func newFakeCsiErrorBackoff(tc *clock.FakeClock) *csiErrorBackoff { + errorBackoffInitialDuration := 200 * time.Millisecond + errorBackoffMaxDuration := 5 * time.Minute return &csiErrorBackoff{flowcontrol.NewFakeBackOff(errorBackoffInitialDuration, errorBackoffMaxDuration, tc)} } diff --git a/pkg/gce-pd-csi-driver/gce-pd-driver.go b/pkg/gce-pd-csi-driver/gce-pd-driver.go index 473c7d424..2849fb380 100644 --- a/pkg/gce-pd-csi-driver/gce-pd-driver.go +++ b/pkg/gce-pd-csi-driver/gce-pd-driver.go @@ -16,6 +16,7 @@ package gceGCEDriver import ( "fmt" + "time" csi "github.com/container-storage-interface/spec/lib/go/csi" "google.golang.org/grpc/codes" @@ -28,6 +29,8 @@ import ( mountmanager "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/mount-manager" ) +var maxLogChar int + type GCEDriver struct { name string vendorVersion string @@ -148,17 +151,19 @@ func NewNodeServer(gceDriver *GCEDriver, mounter *mount.SafeFormatAndMount, devi } } -func NewControllerServer(gceDriver *GCEDriver, cloudProvider gce.GCECompute) *GCEControllerServer { +func NewControllerServer(gceDriver *GCEDriver, cloudProvider gce.GCECompute, errorBackoffInitialDuration, errorBackoffMaxDuration time.Duration) *GCEControllerServer { return &GCEControllerServer{ Driver: gceDriver, CloudProvider: cloudProvider, seen: map[string]int{}, volumeLocks: common.NewVolumeLocks(), - errorBackoff: newCsiErrorBackoff(), + errorBackoff: newCsiErrorBackoff(errorBackoffInitialDuration, errorBackoffMaxDuration), } } -func (gceDriver *GCEDriver) Run(endpoint string) { +func (gceDriver *GCEDriver) Run(endpoint string, grpcLogCharCap int) { + maxLogChar = grpcLogCharCap + klog.V(4).Infof("Driver: %v", gceDriver.name) //Start the nonblocking GRPC s := NewNonBlockingGRPCServer() diff --git a/pkg/gce-pd-csi-driver/gce-pd-driver_test.go b/pkg/gce-pd-csi-driver/gce-pd-driver_test.go index 7aa2fd86a..b32bc64b6 100644 --- a/pkg/gce-pd-csi-driver/gce-pd-driver_test.go +++ b/pkg/gce-pd-csi-driver/gce-pd-driver_test.go @@ -16,6 +16,7 @@ package gceGCEDriver import ( "testing" + "time" gce "sigs.k8s.io/gcp-compute-persistent-disk-csi-driver/pkg/gce-cloud-provider/compute" ) @@ -43,7 +44,10 @@ func initBlockingGCEDriver(t *testing.T, cloudDisks []*gce.CloudDisk, readyToExe func initGCEDriverWithCloudProvider(t *testing.T, cloudProvider gce.GCECompute) *GCEDriver { vendorVersion := "test-vendor" gceDriver := GetGCEDriver() - controllerServer := NewControllerServer(gceDriver, cloudProvider) + errorBackoffInitialDuration := 200 * time.Millisecond + errorBackoffMaxDuration := 5 * time.Minute + + controllerServer := NewControllerServer(gceDriver, cloudProvider, errorBackoffInitialDuration, errorBackoffMaxDuration) err := gceDriver.SetupGCEDriver(driver, vendorVersion, nil, nil, controllerServer, nil) if err != nil { t.Fatalf("Failed to setup GCE Driver: %v", err) diff --git a/pkg/gce-pd-csi-driver/utils.go b/pkg/gce-pd-csi-driver/utils.go index 4862613ab..a38ea2988 100644 --- a/pkg/gce-pd-csi-driver/utils.go +++ b/pkg/gce-pd-csi-driver/utils.go @@ -69,7 +69,11 @@ func logGRPC(ctx context.Context, req interface{}, info *grpc.UnaryServerInfo, h if err != nil { klog.Errorf("%s returned with error: %v", info.FullMethod, err) } else { - klog.V(4).Infof("%s returned with response: %s", info.FullMethod, resp) + cappedStr := fmt.Sprintf("%v", resp) + if len(cappedStr) > maxLogChar { + cappedStr = cappedStr[:maxLogChar] + fmt.Sprintf(" [response body too large, log capped to %d chars]", maxLogChar) + } + klog.V(4).Infof("%s returned with response: %s", info.FullMethod, cappedStr) } return resp, err } diff --git a/test/sanity/sanity_test.go b/test/sanity/sanity_test.go index d3ac9b36d..8c49a8bbe 100644 --- a/test/sanity/sanity_test.go +++ b/test/sanity/sanity_test.go @@ -21,6 +21,7 @@ import ( "path" "strings" "testing" + "time" "github.com/google/uuid" "google.golang.org/grpc" @@ -64,7 +65,7 @@ func TestSanity(t *testing.T) { //Initialize GCE Driver identityServer := driver.NewIdentityServer(gceDriver) - controllerServer := driver.NewControllerServer(gceDriver, cloudProvider) + controllerServer := driver.NewControllerServer(gceDriver, cloudProvider, 0, 5*time.Minute) nodeServer := driver.NewNodeServer(gceDriver, mounter, deviceUtils, metadataservice.NewFakeService(), mountmanager.NewFakeStatter(mounter)) err = gceDriver.SetupGCEDriver(driverName, vendorVersion, extraLabels, identityServer, controllerServer, nodeServer) if err != nil { @@ -90,7 +91,7 @@ func TestSanity(t *testing.T) { }() go func() { - gceDriver.Run(endpoint) + gceDriver.Run(endpoint, 10000) }() // TODO(#818): Fix failing tests and remove test skip flag.