Skip to content

Commit d91e243

Browse files
committed
automatically detect CODER_USR_LIB_DIR
1 parent 66a2ffd commit d91e243

File tree

6 files changed

+88
-35
lines changed

6 files changed

+88
-35
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ The environment variables can be used to configure various aspects of the inner
2020
| `CODER_IMAGE_PULL_SECRET` | The docker credentials to use when pulling the inner container. The recommended way to do this is to create an [Image Pull Secret](https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/#create-a-secret-by-providing-credentials-on-the-command-line) and then reference the secret using an [environment variable](https://kubernetes.io/docs/tasks/inject-data-application/distribute-credentials-secure/#define-container-environment-variables-using-secret-data). See below for example. | false |
2121
| `CODER_DOCKER_BRIDGE_CIDR` | The bridge CIDR to start the Docker daemon with. | false |
2222
| `CODER_MOUNTS` | A list of mounts to mount into the inner container. Mounts default to `rw`. Ex: `CODER_MOUNTS=/home/coder:/home/coder,/var/run/mysecret:/var/run/mysecret:ro` | false |
23-
| `CODER_USR_LIB_DIR` | The mountpoint of the host `/usr/lib` directory. Only required when using GPUs. | false |
23+
| `CODER_USR_LIB_DIR` | The location under which GPU drivers can be found, either if mounted manually from the host or automatically by the container runtime. Only required when using GPUs. If not set, Envbox will try to automatically set this to a sensible value. | false |
2424
| `CODER_ADD_TUN` | If `CODER_ADD_TUN=true` add a TUN device to the inner container. | false |
2525
| `CODER_ADD_FUSE` | If `CODER_ADD_FUSE=true` add a FUSE device to the inner container. | false |
2626
| `CODER_ADD_GPU` | If `CODER_ADD_GPU=true` add detected GPUs and related files to the inner container. Requires setting `CODER_USR_LIB_DIR` and mounting in the hosts `/usr/lib/` directory. | false |

cli/docker.go

Lines changed: 23 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -502,7 +502,27 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
502502

503503
if flags.addGPU {
504504
if flags.hostUsrLibDir == "" {
505-
return xerrors.Errorf("when using GPUs, %q must be specified", EnvUsrLibDir)
505+
// If the user didn't specify CODER_USR_LIB_DIR, try to default to a
506+
// sensible value.
507+
log.Info(ctx, EnvUsrLibDir+" not specified, determining from /etc/os-release (best-effort)")
508+
osReleaseFile, err := fs.Open("/etc/os-release")
509+
if err != nil {
510+
return xerrors.Errorf("open /etc/os-release: %w", err)
511+
}
512+
defer osReleaseFile.Close()
513+
contents, err := io.ReadAll(osReleaseFile)
514+
if err != nil {
515+
return xerrors.Errorf("read /etc/os-release: %w", err)
516+
}
517+
rid := dockerutil.GetOSReleaseID(contents)
518+
found, ok := dockerutil.UsrLibDirs[rid]
519+
if !ok {
520+
// This should actually be impossible as GetOSReleaseID will return
521+
// "linux" as a fallback.
522+
return xerrors.Errorf("developer error: no UsrLibDir for os-release id %q", rid)
523+
}
524+
log.Info(ctx, "automatically set CODER_USR_LIB_DIR", slog.F("path", found))
525+
flags.hostUsrLibDir = found
506526
}
507527

508528
// Unmount GPU drivers in /proc as it causes issues when creating any
@@ -534,6 +554,8 @@ func runDockerCVM(ctx context.Context, log slog.Logger, client dockerutil.Client
534554
slog.F("uid", imgMeta.UID),
535555
slog.F("gid", imgMeta.GID),
536556
slog.F("has_init", imgMeta.HasInit),
557+
slog.F("os_release", imgMeta.OsReleaseID),
558+
slog.F("home_dir", imgMeta.HomeDir),
537559
)
538560

539561
uid, err := strconv.ParseInt(imgMeta.UID, 10, 32)

cli/docker_test.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -532,21 +532,6 @@ func TestDocker(t *testing.T) {
532532
require.True(t, called, "create function was not called for inner container")
533533
})
534534

535-
t.Run("GPUNoUsrLibDir", func(t *testing.T) {
536-
t.Parallel()
537-
538-
ctx, cmd := clitest.New(t, "docker",
539-
"--image=ubuntu",
540-
"--username=root",
541-
"--agent-token=hi",
542-
"--add-gpu=true",
543-
)
544-
545-
err := cmd.ExecuteContext(ctx)
546-
require.Error(t, err)
547-
require.ErrorContains(t, err, fmt.Sprintf("when using GPUs, %q must be specified", cli.EnvUsrLibDir))
548-
})
549-
550535
t.Run("GPU", func(t *testing.T) {
551536
t.Parallel()
552537

dockerutil/image.go

Lines changed: 23 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -29,13 +29,13 @@ var usrLibMultiarchDir = map[string]string{
2929
}
3030

3131
// Adapted from https://github.com/NVIDIA/libnvidia-container/blob/v1.15.0/src/nvc_container.c#L152-L165
32-
var usrLibDirs = map[string]string{
32+
var UsrLibDirs = map[string]string{
3333
// Debian-based distros use a multi-arch directory.
3434
"debian": usrLibMultiarchDir[goruntime.GOARCH],
3535
"ubuntu": usrLibMultiarchDir[goruntime.GOARCH],
3636
// Fedora and Redhat use the standard /usr/lib64.
3737
"fedora": "/usr/lib64",
38-
"redhat": "/usr/lib64",
38+
"rhel": "/usr/lib64",
3939
// Fall back to the standard /usr/lib.
4040
"linux": "/usr/lib",
4141
}
@@ -259,18 +259,7 @@ func GetImageMetadata(ctx context.Context, client Client, img, username string)
259259
Args: []string{etcOsRelease},
260260
})
261261
if err == nil {
262-
for _, line := range strings.Split(string(out), "\n") {
263-
if strings.HasPrefix(line, "ID=") {
264-
osReleaseID = strings.TrimPrefix(line, "ID=")
265-
// The value may be quoted.
266-
osReleaseID = strings.Trim(osReleaseID, "\"")
267-
break
268-
}
269-
}
270-
}
271-
if osReleaseID == "" {
272-
// The default value is just "linux" if we can't find the ID.
273-
osReleaseID = "linux"
262+
osReleaseID = GetOSReleaseID(out)
274263
}
275264

276265
return ImageMetadata{
@@ -285,10 +274,28 @@ func GetImageMetadata(ctx context.Context, client Client, img, username string)
285274
// UsrLibDir returns the path to the /usr/lib directory for the given
286275
// operating system determined by the /etc/os-release file.
287276
func (im ImageMetadata) UsrLibDir() string {
288-
if val, ok := usrLibDirs[im.OsReleaseID]; ok {
277+
if val, ok := UsrLibDirs[im.OsReleaseID]; ok {
289278
return val
290279
}
291-
return usrLibDirs["linux"] // fallback
280+
return UsrLibDirs["linux"] // fallback
281+
}
282+
283+
// GetOSReleaseID returns the ID of the operating system from the
284+
// raw contents of /etc/os-release.
285+
func GetOSReleaseID(raw []byte) string {
286+
var osReleaseID string
287+
for _, line := range strings.Split(string(raw), "\n") {
288+
if strings.HasPrefix(line, "ID=") {
289+
osReleaseID = strings.TrimPrefix(line, "ID=")
290+
// The value may be quoted.
291+
osReleaseID = strings.Trim(osReleaseID, "\"")
292+
break
293+
}
294+
}
295+
if osReleaseID == "" {
296+
return "linux"
297+
}
298+
return osReleaseID
292299
}
293300

294301
func DefaultLogImagePullFn(log buildlog.Logger) func(ImagePullEvent) error {

integration/docker_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ import (
2222
func TestDocker(t *testing.T) {
2323
t.Parallel()
2424
if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" {
25-
t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=true")
25+
t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1")
2626
}
2727

2828
// Dockerd just tests that dockerd can spin up and function correctly.

integration/gpu_test.go

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ import (
1717
func TestDocker_Nvidia(t *testing.T) {
1818
t.Parallel()
1919
if val, ok := os.LookupEnv("CODER_TEST_INTEGRATION"); !ok || val != "1" {
20-
t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=true")
20+
t.Skip("integration tests are skipped unless CODER_TEST_INTEGRATION=1")
2121
}
2222
// Only run this test if the nvidia container runtime is detected.
2323
// Check if the nvidia runtime is available using `docker info`.
@@ -44,6 +44,23 @@ func TestDocker_Nvidia(t *testing.T) {
4444
require.NoError(t, err, "failed to run nvidia-smi in the inner container")
4545
})
4646

47+
t.Run("Ubuntu_NoUsrLibDir", func(t *testing.T) {
48+
t.Parallel()
49+
ctx, cancel := context.WithCancel(context.Background())
50+
t.Cleanup(cancel)
51+
52+
// Start the envbox container.
53+
ctID := startEnvboxCmd(ctx, t, integrationtest.UbuntuImage, "root",
54+
"--env", "CODER_ADD_GPU=true",
55+
"--runtime=nvidia",
56+
"--gpus=all",
57+
)
58+
59+
// Assert that we can run nvidia-smi in the inner container.
60+
_, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi")
61+
require.NoError(t, err, "failed to run nvidia-smi in the inner container")
62+
})
63+
4764
t.Run("Redhat", func(t *testing.T) {
4865
t.Parallel()
4966
ctx, cancel := context.WithCancel(context.Background())
@@ -61,6 +78,23 @@ func TestDocker_Nvidia(t *testing.T) {
6178
_, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi")
6279
require.NoError(t, err, "failed to run nvidia-smi in the inner container")
6380
})
81+
82+
t.Run("Redhat_NoUsrLibDir", func(t *testing.T) {
83+
t.Parallel()
84+
ctx, cancel := context.WithCancel(context.Background())
85+
t.Cleanup(cancel)
86+
87+
// Start the envbox container.
88+
ctID := startEnvboxCmd(ctx, t, integrationtest.RedhatImage, "root",
89+
"--env", "CODER_ADD_GPU=true",
90+
"--runtime=nvidia",
91+
"--gpus=all",
92+
)
93+
94+
// Assert that we can run nvidia-smi in the inner container.
95+
_, err := execContainerCmd(ctx, t, ctID, "docker", "exec", "workspace_cvm", "nvidia-smi")
96+
require.NoError(t, err, "failed to run nvidia-smi in the inner container")
97+
})
6498
}
6599

66100
// dockerRuntimes returns the list of container runtimes available on the host.
@@ -78,6 +112,11 @@ func dockerRuntimes(t *testing.T) []string {
78112
return strings.Split(raw, "\n")
79113
}
80114

115+
// startEnvboxCmd starts the envbox container with the given arguments.
116+
// Ideally we would use ory/dockertest for this, but it doesn't support
117+
// specifying the runtime. We have alternatively used the docker client library,
118+
// but a nice property of using the docker cli is that if a test fails, you can
119+
// just run the command manually to debug!
81120
func startEnvboxCmd(ctx context.Context, t *testing.T, innerImage, innerUser string, addlArgs ...string) (containerID string) {
82121
t.Helper()
83122

0 commit comments

Comments
 (0)