diff --git a/components/supervisor/go.mod b/components/supervisor/go.mod index f77074cc80de10..0e131c6d5b3d69 100644 --- a/components/supervisor/go.mod +++ b/components/supervisor/go.mod @@ -24,6 +24,7 @@ require ( github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 github.com/grpc-ecosystem/grpc-gateway/v2 v2.11.3 github.com/improbable-eng/grpc-web v0.14.0 + github.com/mailru/easygo v0.0.0-20190618140210-3c14a0dc985f github.com/prometheus/client_golang v1.14.0 github.com/prometheus/client_model v0.3.0 github.com/prometheus/common v0.42.0 @@ -115,7 +116,6 @@ require ( github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/moredure/easygo v0.0.0-20220122214504-21cd2ebdd15b github.com/opencontainers/go-digest v1.0.0 // indirect github.com/opencontainers/image-spec v1.0.2 // indirect github.com/opencontainers/runtime-spec v1.0.2 // indirect diff --git a/components/supervisor/go.sum b/components/supervisor/go.sum index 9c686da66ca21d..4760166cdd1cde 100644 --- a/components/supervisor/go.sum +++ b/components/supervisor/go.sum @@ -231,6 +231,8 @@ github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y= github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII= +github.com/mailru/easygo v0.0.0-20190618140210-3c14a0dc985f h1:4+gHs0jJFJ06bfN8PshnM6cHcxGjRUVRLo5jndDiKRQ= +github.com/mailru/easygo v0.0.0-20190618140210-3c14a0dc985f/go.mod h1:tHCZHV8b2A90ObojrEAzY0Lb03gxUxjDHr5IJyAh4ew= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14 h1:yVuAays6BHfxijgZPzw+3Zlu5yQgKGP2/hcQbHb7S9Y= github.com/matttproud/golang_protobuf_extensions v1.0.4 h1:mmDVorXM7PCGKw94cs5zkfA9PSy5pEvNWRP0ET0TIVo= @@ -251,8 +253,6 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJ github.com/modern-go/reflect2 v0.0.0-20180701023420-4b7aa43c6742/go.mod h1:bx2lNnkwVCuqBIxFjflWJWanXIb3RllmbCylyMrvgv0= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= -github.com/moredure/easygo v0.0.0-20220122214504-21cd2ebdd15b h1:7zlLj8X9PO8TOvWcjKZe2SwviEZE2vsnFYk85Zhi5kE= -github.com/moredure/easygo v0.0.0-20220122214504-21cd2ebdd15b/go.mod h1:RhGu0fPcznUgpHzlXjr0zuSSzRJhwe/wbFQLKHAeDSY= github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f h1:KUppIJq7/+SVif2QVs3tOP0zanoHgBEVAwHxUSIzRqU= github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U= github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= diff --git a/components/supervisor/pkg/activation/activation.go b/components/supervisor/pkg/activation/activation.go index 59e3ec96c54b35..cf21968ac0ec04 100644 --- a/components/supervisor/pkg/activation/activation.go +++ b/components/supervisor/pkg/activation/activation.go @@ -10,8 +10,7 @@ import ( "os" "sync" - "github.com/gitpod-io/gitpod/common-go/log" - "github.com/moredure/easygo/netpoll" + "github.com/mailru/easygo/netpoll" "golang.org/x/xerrors" ) @@ -20,22 +19,16 @@ type Callback func(socketFD *os.File) error // Listen polls on the listener and calls callback when someone writes to it. func Listen(ctx context.Context, l net.Listener, activate Callback) error { - poller, err := netpoll.New(&netpoll.Config{ - OnWaitError: func(err error) { - log.WithError(err).Error("netpoll: wait loop error") - }, - }) + poller, err := netpoll.New(nil) if err != nil { return err } - defer poller.Close() // Get netpoll descriptor with EventRead|EventEdgeTriggered. desc, err := netpoll.HandleListener(l, netpoll.EventRead|netpoll.EventEdgeTriggered) if err != nil { return err } - defer desc.Close() var ( runc = make(chan bool, 1) diff --git a/components/supervisor/pkg/supervisor/docker.go b/components/supervisor/pkg/supervisor/docker.go deleted file mode 100644 index 6beb64f5d2a69e..00000000000000 --- a/components/supervisor/pkg/supervisor/docker.go +++ /dev/null @@ -1,280 +0,0 @@ -// Copyright (c) 2023 Gitpod GmbH. All rights reserved. -// Licensed under the GNU Affero General Public License (AGPL). -// See License.AGPL.txt in the project root for license information. - -package supervisor - -import ( - "context" - "errors" - "fmt" - "io" - "net" - "os" - "os/exec" - "strings" - "sync" - "syscall" - "time" - - "github.com/cenkalti/backoff/v4" - "golang.org/x/xerrors" - - "github.com/gitpod-io/gitpod/common-go/analytics" - "github.com/gitpod-io/gitpod/common-go/log" - "github.com/gitpod-io/gitpod/common-go/process" - "github.com/gitpod-io/gitpod/supervisor/api" - "github.com/gitpod-io/gitpod/supervisor/pkg/activation" - "github.com/gitpod-io/gitpod/supervisor/pkg/dropwriter" - "github.com/gitpod-io/gitpod/supervisor/pkg/terminal" -) - -// if exit error happens one after another within dockerStartErrorBurstDuration then we're in the burst -// and should stop trying after maxBurstDockerStartAttempts -const ( - maxBurstDockerStartAttempts = 8 - dockerStartErrorBurstDuration = time.Minute * 1 - maxIntervalBetweenDockerStart = 15 * time.Second - - logsDir = "/workspace/.gitpod/logs" - dockerUpLogFilePath = logsDir + "/docker-up.log" -) - -var ( - failedForRetryError = errors.New("failed for retry start docker-up") -) - -func socketActivationForDocker(parentCtx context.Context, wg *sync.WaitGroup, term *terminal.Mux, cfg *Config, telemetry analytics.Writer, notifications *NotificationService) { - defer wg.Done() - - ctx, cancel := context.WithCancel(parentCtx) - defer cancel() - - logFile, err := openDockerUpLogFile() - if err != nil { - log.WithError(err).Error("docker-up: cannot open log file") - } else { - defer logFile.Close() - } - - var notificationSent bool - var notificationDisabled = cfg.isHeadless() || logFile == nil - notifyDockerUpFail := func() error { - if notificationDisabled || notificationSent { - return nil - } - notificationSent = true - defer func() { - notificationSent = false - }() - openLogs := "Open Logs" - dontShowAgain := "Don't Show Again" - resp, err := notifications.Notify(ctx, &api.NotifyRequest{ - Level: api.NotifyRequest_ERROR, - Message: "Docker daemon is failing to start.", - Actions: []string{openLogs, dontShowAgain}, - }) - if err != nil { - return err - } - telemetry.Track(analytics.TrackMessage{ - Identity: analytics.Identity{UserID: cfg.OwnerId}, - Event: "gitpod_activate_docker_fail_notification", - Properties: map[string]interface{}{ - "instanceId": cfg.WorkspaceInstanceID, - "workspaceId": cfg.WorkspaceID, - "debugWorkspace": cfg.isDebugWorkspace(), - "action": resp.Action, - }, - }) - if resp.Action == dontShowAgain { - notificationDisabled = true - return nil - } - if resp.Action != openLogs { - return nil - } - gpPath, err := exec.LookPath("gp") - if err != nil { - return err - } - gpCmd := exec.CommandContext(ctx, gpPath, "open", dockerUpLogFilePath) - gpCmd.Env = childProcEnvvars - gpCmd.Stdout = os.Stdout - gpCmd.Stderr = os.Stderr - return gpCmd.Run() - } - - var stdout, stderr io.Writer - - stdout = os.Stdout - stderr = os.Stderr - if cfg.WorkspaceLogRateLimit > 0 { - limit := int64(cfg.WorkspaceLogRateLimit) - stdout = dropwriter.Writer(stdout, dropwriter.NewBucket(limit*1024*3, limit*1024)) - stderr = dropwriter.Writer(stderr, dropwriter.NewBucket(limit*1024*3, limit*1024)) - log.WithField("limit_kb_per_sec", limit).Info("docker-up: rate limiting log output") - } - - if logFile != nil { - stdout = io.MultiWriter(stdout, logFile) - stderr = io.MultiWriter(stderr, logFile) - } - - for ctx.Err() == nil { - err := listenToDockerSocket(ctx, term, cfg, telemetry, logFile, stdout, stderr) - if ctx.Err() != nil { - return - } - - if errors.Is(err, failedForRetryError) { - go func() { - notifyErr := notifyDockerUpFail() - if notifyErr != nil && !errors.Is(notifyErr, context.Canceled) { - log.WithError(notifyErr).Error("cannot notify about docker-up failure") - } - }() - } - - time.Sleep(1 * time.Second) - } -} - -// listenToDockerSocket listens to the docker socket and starts the docker-up process -// if it fails to start with sometimes retry then listener is closed to requests all incoming requests and prevent infinite try -func listenToDockerSocket(parentCtx context.Context, term *terminal.Mux, cfg *Config, telemetry analytics.Writer, logFile *os.File, stdout, stderr io.Writer) error { - ctx, cancel := context.WithCancel(parentCtx) - defer cancel() - - fn := "/var/run/docker.sock" - l, err := net.Listen("unix", fn) - if err != nil { - return err - } - - go func() { - <-ctx.Done() - l.Close() - }() - - _ = os.Chown(fn, gitpodUID, gitpodGID) - - var lastExitErrorTime time.Time - burstAttempts := 0 - backoffStrategy := backoff.NewExponentialBackOff() - backoffStrategy.MaxInterval = maxIntervalBetweenDockerStart - - for ctx.Err() == nil { - err = activation.Listen(ctx, l, func(socketFD *os.File) error { - defer socketFD.Close() - startTime := time.Now() - telemetry.Track(analytics.TrackMessage{ - Identity: analytics.Identity{UserID: cfg.OwnerId}, - Event: "gitpod_activate_docker", - Properties: map[string]interface{}{ - "instanceId": cfg.WorkspaceInstanceID, - "workspaceId": cfg.WorkspaceID, - "debugWorkspace": cfg.isDebugWorkspace(), - }, - }) - - if logFile != nil { - defer fmt.Fprintf(logFile, "\n======= Stop docker-up at %s =======\n", time.Now().Format(time.RFC3339)) - fmt.Fprintf(logFile, "======= Start docker-up at %s =======\n", startTime.Format(time.RFC3339)) - } - - cmd := exec.CommandContext(ctx, "/usr/bin/docker-up") - cmd.Env = append(os.Environ(), "LISTEN_FDS=1") - cmd.ExtraFiles = []*os.File{socketFD} - cmd.Stdout = stdout - cmd.Stderr = stderr - - err = cmd.Start() - if err != nil { - return err - } - ptyCtx, cancel := context.WithCancel(ctx) - go func(ptyCtx context.Context) { - select { - case <-ctx.Done(): - _ = cmd.Process.Signal(syscall.SIGTERM) - case <-ptyCtx.Done(): - } - }(ptyCtx) - err = cmd.Wait() - cancel() - return err - }) - - if ctx.Err() != nil { - return ctx.Err() - } - - var exitError *exec.ExitError - if err != nil { - exitError, _ = err.(*exec.ExitError) - } - if exitError != nil && exitError.ExitCode() > 0 { - // docker-up or daemon exited with an error - we'll try again - // it can be a transient condition like apt is locked by another process to install prerequisites - // or permament like misconfigured env var, image which does not allow to install prerequisites at all - // or any general issue with docker daemon startup - if time.Since(lastExitErrorTime) <= dockerStartErrorBurstDuration { - // we're in the exit error burst - burstAttempts++ - } else { - // no burst, reset the counter - burstAttempts = 0 - backoffStrategy.Reset() - } - lastExitErrorTime = time.Now() - - } else { - // transient condition like a docker daemon get killed because of OOM - burstAttempts = 0 - backoffStrategy.Reset() - } - - nextBackOff := backoffStrategy.NextBackOff() - - if nextBackOff == backoff.Stop || burstAttempts >= maxBurstDockerStartAttempts { - cancel() - log.WithError(err).WithField("attempts", burstAttempts).Error("cannot activate docker after maximum attempts, stopping trying permanently") - if logFile != nil { - fmt.Fprintln(logFile, "Cannot activate docker after maximum attempts, stopping trying permanently.") - fmt.Fprintln(logFile, "Please check logs above, fix the configuration, use `gp validate` to verify change, and commit to apply.") - fmt.Fprintf(logFile, "If it does not help, please reach out to the support. Don't forget to share your workspace ID: %s.\n", cfg.WorkspaceID) - } - return failedForRetryError - } - - if err != nil && - !errors.Is(err, context.Canceled) && - !process.IsNotChildProcess(err) && - !strings.Contains(err.Error(), "signal: ") { - // don't log typical transient errors - log.WithError(err).WithField("attempts", burstAttempts).WithField("backoff", nextBackOff.String()).Error("cannot activate docker, retrying...") - } - time.Sleep(nextBackOff) - } - return ctx.Err() -} - -func openDockerUpLogFile() (*os.File, error) { - if err := os.MkdirAll(logsDir, 0755); err != nil { - return nil, xerrors.Errorf("cannot create logs dir: %w", err) - } - if err := os.Chown(logsDir, gitpodUID, gitpodGID); err != nil { - return nil, xerrors.Errorf("cannot chown logs dir: %w", err) - } - logFile, err := os.OpenFile(dockerUpLogFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) - if err != nil { - return nil, xerrors.Errorf("cannot open docker-up log file: %w", err) - } - - if err := os.Chown(dockerUpLogFilePath, gitpodUID, gitpodGID); err != nil { - _ = logFile.Close() - return nil, xerrors.Errorf("cannot chown docker-up log file: %w", err) - } - return logFile, nil -} diff --git a/components/supervisor/pkg/supervisor/supervisor.go b/components/supervisor/pkg/supervisor/supervisor.go index 11e64d030bd933..39b942bc0a3257 100644 --- a/components/supervisor/pkg/supervisor/supervisor.go +++ b/components/supervisor/pkg/supervisor/supervisor.go @@ -55,6 +55,7 @@ import ( "github.com/gitpod-io/gitpod/content-service/pkg/git" gitpod "github.com/gitpod-io/gitpod/gitpod-protocol" "github.com/gitpod-io/gitpod/supervisor/api" + "github.com/gitpod-io/gitpod/supervisor/pkg/activation" "github.com/gitpod-io/gitpod/supervisor/pkg/config" "github.com/gitpod-io/gitpod/supervisor/pkg/dropwriter" "github.com/gitpod-io/gitpod/supervisor/pkg/metrics" @@ -407,7 +408,7 @@ func Run(options ...RunOption) { if !opts.RunGP { wg.Add(1) - go socketActivationForDocker(ctx, &wg, termMux, cfg, telemetry, notificationService) + go socketActivationForDocker(ctx, &wg, termMux, cfg, telemetry) } if cfg.isHeadless() { @@ -1546,6 +1547,73 @@ func recordInitializerMetrics(path string, metrics *metrics.SupervisorMetrics) { } } +func socketActivationForDocker(ctx context.Context, wg *sync.WaitGroup, term *terminal.Mux, cfg *Config, w analytics.Writer) { + defer wg.Done() + + fn := "/var/run/docker.sock" + l, err := net.Listen("unix", fn) + if err != nil { + log.WithError(err).Error("cannot provide Docker activation socket") + return + } + + go func() { + <-ctx.Done() + l.Close() + }() + + _ = os.Chown(fn, gitpodUID, gitpodGID) + for ctx.Err() == nil { + err = activation.Listen(ctx, l, func(socketFD *os.File) error { + defer socketFD.Close() + cmd := exec.Command("/usr/bin/docker-up") + cmd.Env = append(os.Environ(), "LISTEN_FDS=1") + cmd.ExtraFiles = []*os.File{socketFD} + alias, err := term.Start(cmd, terminal.TermOptions{ + Annotations: map[string]string{ + "gitpod.supervisor": "true", + }, + LogToStdout: true, + }) + outcome := "success" + if err != nil { + outcome = "failure" + } + w.Track(analytics.TrackMessage{ + Identity: analytics.Identity{UserID: cfg.OwnerId}, + Event: "gitpod_activate_docker", + Properties: map[string]interface{}{ + "instanceId": cfg.WorkspaceInstanceID, + "workspaceId": cfg.WorkspaceID, + "outcome": outcome, + "debugWorkspace": cfg.isDebugWorkspace(), + }, + }) + if err != nil { + return err + } + pty, ok := term.Get(alias) + if !ok { + return errors.New("cannot find pty") + } + ptyCtx, cancel := context.WithCancel(context.Background()) + go func(ptyCtx context.Context) { + select { + case <-ctx.Done(): + _ = pty.Command.Process.Signal(syscall.SIGTERM) + case <-ptyCtx.Done(): + } + }(ptyCtx) + _, err = pty.Wait() + cancel() + return err + }) + if err != nil && !errors.Is(err, context.Canceled) && err.Error() != "signal: killed" { + log.WithError(err).Error("cannot provide Docker activation socket") + } + } +} + type PerfAnalyzer struct { label string defs []int