Skip to content

Commit 6a2c155

Browse files
committed
libcontainer: ability to compile without kmem
Commit fe898e7 (PR #1350) enables kernel memory accounting for all cgroups created by libcontainer -- even if kmem limit is not configured. Kernel memory accounting is known to be broken in some kernels, specifically the ones from RHEL7 (including RHEL 7.5). Those kernels do not support kernel memory reclaim, and are prone to oopses. Unconditionally enabling kmem acct on such kernels lead to bugs, such as * #1725 * kubernetes/kubernetes#61937 * moby/moby#29638 This commit gives a way to compile runc without kernel memory setting support. To do so, use something like make BUILDTAGS="seccomp nokmem" Signed-off-by: Kir Kolyshkin <[email protected]>
1 parent f3ce822 commit 6a2c155

File tree

4 files changed

+69
-46
lines changed

4 files changed

+69
-46
lines changed

Makefile

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD 2>/dev/null)
1212
GIT_BRANCH_CLEAN := $(shell echo $(GIT_BRANCH) | sed -e "s/[^[:alnum:]]/-/g")
1313
RUNC_IMAGE := runc_dev$(if $(GIT_BRANCH_CLEAN),:$(GIT_BRANCH_CLEAN))
1414
PROJECT := github.com/opencontainers/runc
15-
BUILDTAGS := seccomp
15+
BUILDTAGS ?= seccomp
1616
COMMIT_NO := $(shell git rev-parse HEAD 2> /dev/null || true)
1717
COMMIT := $(if $(shell git status --porcelain --untracked-files=no),"${COMMIT_NO}-dirty","${COMMIT_NO}")
1818

libcontainer/cgroups/fs/kmem.go

+55
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
// +build linux,!nokmem
2+
3+
package fs
4+
5+
import (
6+
"fmt"
7+
"io/ioutil"
8+
"os"
9+
"path/filepath"
10+
"strconv"
11+
"syscall" // for Errno type only
12+
13+
"github.com/opencontainers/runc/libcontainer/cgroups"
14+
"golang.org/x/sys/unix"
15+
)
16+
17+
const cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
18+
19+
func EnableKernelMemoryAccounting(path string) error {
20+
// Check if kernel memory is enabled
21+
// We have to limit the kernel memory here as it won't be accounted at all
22+
// until a limit is set on the cgroup and limit cannot be set once the
23+
// cgroup has children, or if there are already tasks in the cgroup.
24+
for _, i := range []int64{1, -1} {
25+
if err := setKernelMemory(path, i); err != nil {
26+
return err
27+
}
28+
}
29+
return nil
30+
}
31+
32+
func setKernelMemory(path string, kernelMemoryLimit int64) error {
33+
if path == "" {
34+
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
35+
}
36+
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
37+
// kernel memory is not enabled on the system so we should do nothing
38+
return nil
39+
}
40+
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
41+
// Check if the error number returned by the syscall is "EBUSY"
42+
// The EBUSY signal is returned on attempts to write to the
43+
// memory.kmem.limit_in_bytes file if the cgroup has children or
44+
// once tasks have been attached to the cgroup
45+
if pathErr, ok := err.(*os.PathError); ok {
46+
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
47+
if errNo == unix.EBUSY {
48+
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
49+
}
50+
}
51+
}
52+
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
53+
}
54+
return nil
55+
}
+11
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
// +build linux,nokmem
2+
3+
package fs
4+
5+
func EnableKernelMemoryAccounting(path string) error {
6+
return nil
7+
}
8+
9+
func setKernelMemory(path string, kernelMemoryLimit int64) error {
10+
return nil
11+
}

libcontainer/cgroups/fs/memory.go

+2-45
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,18 @@ package fs
55
import (
66
"bufio"
77
"fmt"
8-
"io/ioutil"
98
"os"
109
"path/filepath"
1110
"strconv"
1211
"strings"
13-
"syscall" // only for Errno
1412

1513
"github.com/opencontainers/runc/libcontainer/cgroups"
1614
"github.com/opencontainers/runc/libcontainer/configs"
17-
18-
"golang.org/x/sys/unix"
1915
)
2016

2117
const (
22-
cgroupKernelMemoryLimit = "memory.kmem.limit_in_bytes"
23-
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
24-
cgroupMemoryLimit = "memory.limit_in_bytes"
18+
cgroupMemorySwapLimit = "memory.memsw.limit_in_bytes"
19+
cgroupMemoryLimit = "memory.limit_in_bytes"
2520
)
2621

2722
type MemoryGroup struct {
@@ -67,44 +62,6 @@ func (s *MemoryGroup) Apply(d *cgroupData) (err error) {
6762
return nil
6863
}
6964

70-
func EnableKernelMemoryAccounting(path string) error {
71-
// Check if kernel memory is enabled
72-
// We have to limit the kernel memory here as it won't be accounted at all
73-
// until a limit is set on the cgroup and limit cannot be set once the
74-
// cgroup has children, or if there are already tasks in the cgroup.
75-
for _, i := range []int64{1, -1} {
76-
if err := setKernelMemory(path, i); err != nil {
77-
return err
78-
}
79-
}
80-
return nil
81-
}
82-
83-
func setKernelMemory(path string, kernelMemoryLimit int64) error {
84-
if path == "" {
85-
return fmt.Errorf("no such directory for %s", cgroupKernelMemoryLimit)
86-
}
87-
if !cgroups.PathExists(filepath.Join(path, cgroupKernelMemoryLimit)) {
88-
// kernel memory is not enabled on the system so we should do nothing
89-
return nil
90-
}
91-
if err := ioutil.WriteFile(filepath.Join(path, cgroupKernelMemoryLimit), []byte(strconv.FormatInt(kernelMemoryLimit, 10)), 0700); err != nil {
92-
// Check if the error number returned by the syscall is "EBUSY"
93-
// The EBUSY signal is returned on attempts to write to the
94-
// memory.kmem.limit_in_bytes file if the cgroup has children or
95-
// once tasks have been attached to the cgroup
96-
if pathErr, ok := err.(*os.PathError); ok {
97-
if errNo, ok := pathErr.Err.(syscall.Errno); ok {
98-
if errNo == unix.EBUSY {
99-
return fmt.Errorf("failed to set %s, because either tasks have already joined this cgroup or it has children", cgroupKernelMemoryLimit)
100-
}
101-
}
102-
}
103-
return fmt.Errorf("failed to write %v to %v: %v", kernelMemoryLimit, cgroupKernelMemoryLimit, err)
104-
}
105-
return nil
106-
}
107-
10865
func setMemoryAndSwap(path string, cgroup *configs.Cgroup) error {
10966
// If the memory update is set to -1 we should also
11067
// set swap to -1, it means unlimited memory.

0 commit comments

Comments
 (0)