Skip to content

Commit 0d37a62

Browse files
authored
fix: add a 1m timeout to signal shutdown (#92)
- It's possible for a number of reasons for the bootstrap script to not exit. This PR adds a hard timeout of 1 minute before we forcefully exit.
1 parent f73e3d8 commit 0d37a62

File tree

5 files changed

+87
-15
lines changed

5 files changed

+87
-15
lines changed

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ The environment variables can be used to configure various aspects of the inner
2727
| `CODER_CPUS` | Dictates the number of CPUs to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
2828
| `CODER_MEMORY` | Dictates the max memory (in bytes) to allocate the inner container. It is recommended to set this using the Kubernetes [Downward API](https://kubernetes.io/docs/tasks/inject-data-application/environment-variable-expose-pod-information/#use-container-fields-as-values-for-environment-variables). | false |
2929
| `CODER_DISABLE_IDMAPPED_MOUNT` | Disables idmapped mounts in sysbox. For more information, see the [Sysbox Documentation](https://github.com/nestybox/sysbox/blob/master/docs/user-guide/configuration.md#disabling-id-mapped-mounts-on-sysbox). | false |
30+
| `CODER_SHUTDOWN_TIMEOUT` | Configure a custom shutdown timeout to wait for the boostrap command to exit. Defaults to 1 minute. | false |
3031

3132
## Coder Template
3233

cli/docker.go

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"sort"
1414
"strconv"
1515
"strings"
16+
"time"
1617

1718
dockertypes "github.com/docker/docker/api/types"
1819
"github.com/docker/docker/api/types/container"
@@ -39,6 +40,10 @@ const (
3940
// EnvBoxContainerName is the name of the inner user container.
4041
EnvBoxPullImageSecretEnvVar = "CODER_IMAGE_PULL_SECRET" //nolint:gosec
4142
EnvBoxContainerName = "CODER_CVM_CONTAINER_NAME"
43+
// We define a custom exit code to distinguish from the generic '1' when envbox exits due to a shutdown timeout.
44+
// Docker claims exit codes 125-127 so we start at 150 to
45+
// ensure we don't collide.
46+
ExitCodeShutdownTimeout = 150
4247
)
4348

4449
const (
@@ -77,8 +82,9 @@ const (
7782
// with UID/GID 1000 will be mapped to `UserNamespaceOffset` + 1000
7883
// on the host. Changing this value will result in improper mappings
7984
// on existing containers.
80-
UserNamespaceOffset = 100000
81-
devDir = "/dev"
85+
UserNamespaceOffset = 100000
86+
devDir = "/dev"
87+
defaultShutdownTimeout = time.Minute
8288
)
8389

8490
var (
@@ -102,6 +108,7 @@ var (
102108
EnvDockerConfig = "CODER_DOCKER_CONFIG"
103109
EnvDebug = "CODER_DEBUG"
104110
EnvDisableIDMappedMount = "CODER_DISABLE_IDMAPPED_MOUNT"
111+
EnvShutdownTimeout = "CODER_SHUTDOWN_TIMEOUT"
105112
)
106113

107114
var envboxPrivateMounts = map[string]struct{}{
@@ -139,6 +146,7 @@ type flags struct {
139146
cpus int
140147
memory int
141148
disableIDMappedMount bool
149+
shutdownTimeout time.Duration
142150

143151
// Test flags.
144152
noStartupLogs bool
@@ -348,6 +356,7 @@ func dockerCmd(ch chan func() error) *cobra.Command {
348356
cliflag.IntVarP(cmd.Flags(), &flags.cpus, "cpus", "", EnvCPUs, 0, "Number of CPUs to allocate inner container. e.g. 2")
349357
cliflag.IntVarP(cmd.Flags(), &flags.memory, "memory", "", EnvMemory, 0, "Max memory to allocate to the inner container in bytes.")
350358
cliflag.BoolVarP(cmd.Flags(), &flags.disableIDMappedMount, "disable-idmapped-mount", "", EnvDisableIDMappedMount, false, "Disable idmapped mounts in sysbox. Note that you may need an alternative (e.g. shiftfs).")
359+
cliflag.DurationVarP(cmd.Flags(), &flags.shutdownTimeout, "shutdown-timeout", "", EnvShutdownTimeout, defaultShutdownTimeout, "Duration after which envbox will be forcefully terminated.")
351360

352361
// Test flags.
353362
cliflag.BoolVarP(cmd.Flags(), &flags.noStartupLogs, "no-startup-log", "", "", false, "Do not log startup logs. Useful for testing.")
@@ -724,27 +733,38 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Docker
724733
return xerrors.Errorf("exec inspect: %w", err)
725734
}
726735

727-
shutdownCh <- func() error {
728-
log.Debug(ctx, "killing container", slog.F("bootstrap_pid", bootstrapPID))
736+
shutdownCh <- killBootstrapCmd(ctx, log, bootstrapPID, bootstrapExec.ID, client, flags.shutdownTimeout)
729737

738+
return nil
739+
}
740+
741+
// KillBootstrapCmd is the command we run when we receive a signal
742+
// to kill the envbox container.
743+
func killBootstrapCmd(ctx context.Context, log slog.Logger, pid int, execID string, client dockerutil.DockerClient, timeout time.Duration) func() error {
744+
return func() error {
745+
log.Debug(ctx, "killing container",
746+
slog.F("bootstrap_pid", pid),
747+
slog.F("timeout", timeout.String()),
748+
)
749+
750+
ctx, cancel := context.WithTimeout(ctx, timeout)
751+
defer cancel()
730752
// The PID returned is the PID _outside_ the container...
731753
//nolint:gosec
732-
out, err := exec.Command("kill", "-TERM", strconv.Itoa(bootstrapPID)).CombinedOutput()
754+
out, err := exec.CommandContext(ctx, "kill", "-TERM", strconv.Itoa(pid)).CombinedOutput()
733755
if err != nil {
734756
return xerrors.Errorf("kill bootstrap process (%s): %w", out, err)
735757
}
736758

737759
log.Debug(ctx, "sent kill signal waiting for process to exit")
738-
err = dockerutil.WaitForExit(ctx, client, bootstrapExec.ID)
760+
err = dockerutil.WaitForExit(ctx, client, execID)
739761
if err != nil {
740762
return xerrors.Errorf("wait for exit: %w", err)
741763
}
742764

743765
log.Debug(ctx, "bootstrap process successfully exited")
744766
return nil
745767
}
746-
747-
return nil
748768
}
749769

750770
//nolint:revive

cmd/envbox/main.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ import (
88
"runtime"
99
"syscall"
1010

11+
"golang.org/x/xerrors"
12+
1113
"cdr.dev/slog"
1214
"cdr.dev/slog/sloggers/slogjson"
1315
"github.com/coder/envbox/cli"
@@ -29,6 +31,9 @@ func main() {
2931
err := fn()
3032
if err != nil {
3133
log.Error(ctx, "shutdown function failed", slog.Error(err))
34+
if xerrors.Is(err, context.DeadlineExceeded) {
35+
os.Exit(cli.ExitCodeShutdownTimeout)
36+
}
3237
os.Exit(1)
3338
}
3439
default:

integration/docker_test.go

Lines changed: 44 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -306,10 +306,8 @@ func TestDocker(t *testing.T) {
306306
require.Error(t, err)
307307

308308
// Simulate a shutdown.
309-
integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
310-
311-
err = resource.Close()
312-
require.NoError(t, err)
309+
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
310+
require.Equal(t, 0, exitCode)
313311

314312
t.Logf("envbox %q started successfully, recreating...", resource.Container.ID)
315313
// Run the envbox container.
@@ -326,6 +324,34 @@ func TestDocker(t *testing.T) {
326324
})
327325
require.NoError(t, err)
328326
})
327+
328+
t.Run("ShutdownTimeout", func(t *testing.T) {
329+
t.Parallel()
330+
331+
pool, err := dockertest.NewPool("")
332+
require.NoError(t, err)
333+
334+
var (
335+
tmpdir = integrationtest.TmpDir(t)
336+
binds = integrationtest.DefaultBinds(t, tmpdir)
337+
)
338+
339+
envs := []string{fmt.Sprintf("%s=%s", cli.EnvShutdownTimeout, "1s")}
340+
341+
// Run the envbox container.
342+
resource := integrationtest.RunEnvbox(t, pool, &integrationtest.CreateDockerCVMConfig{
343+
Image: integrationtest.UbuntuImage,
344+
Username: "root",
345+
Envs: envs,
346+
Binds: binds,
347+
BootstrapScript: sigtrapForeverScript,
348+
})
349+
350+
// Simulate a shutdown.
351+
exitCode := integrationtest.StopContainer(t, pool, resource.Container.ID, 30*time.Second)
352+
// We expect it to timeout which should result in a special exit code.
353+
require.Equal(t, cli.ExitCodeShutdownTimeout, exitCode)
354+
})
329355
}
330356

331357
func requireSliceNoContains(t *testing.T, ss []string, els ...string) {
@@ -358,6 +384,20 @@ func bindMount(src, dest string, ro bool) string {
358384
return fmt.Sprintf("%s:%s", src, dest)
359385
}
360386

387+
const sigtrapForeverScript = `#!/bin/bash
388+
cleanup() {
389+
echo "Got a signal, going to sleep!" && sleep infinity
390+
exit 0
391+
}
392+
393+
trap 'cleanup' INT TERM
394+
395+
while true; do
396+
echo "Working..."
397+
sleep 1
398+
done
399+
`
400+
361401
const sigtrapScript = `#!/bin/bash
362402
cleanup() {
363403
echo "HANDLING A SIGNAL!" && touch /home/coder/foo && echo "touched file"

integration/integrationtest/docker.go

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,12 @@ func RunEnvbox(t *testing.T, pool *dockertest.Pool, conf *CreateDockerCVMConfig)
9191
host.CPUQuota = int64(conf.CPUs) * int64(dockerutil.DefaultCPUPeriod)
9292
})
9393
require.NoError(t, err)
94-
// t.Cleanup(func() { _ = pool.Purge(resource) })
94+
t.Cleanup(func() {
95+
// Only delete the container if the test passes.
96+
if !t.Failed() {
97+
resource.Close()
98+
}
99+
})
95100

96101
waitForCVM(t, pool, resource)
97102

@@ -264,7 +269,7 @@ func ExecEnvbox(t *testing.T, pool *dockertest.Pool, conf ExecConfig) ([]byte, e
264269
return buf.Bytes(), nil
265270
}
266271

267-
func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) {
272+
func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Duration) int {
268273
t.Helper()
269274

270275
err := pool.Client.KillContainer(docker.KillContainerOptions{
@@ -283,10 +288,11 @@ func StopContainer(t *testing.T, pool *dockertest.Pool, id string, to time.Durat
283288
continue
284289
}
285290

286-
return
291+
return cnt.State.ExitCode
287292
}
288293

289294
t.Fatalf("timed out waiting for container %s to stop", id)
295+
return 1
290296
}
291297

292298
// cmdLineEnvs returns args passed to the /envbox command

0 commit comments

Comments
 (0)