Skip to content

Commit e55c53c

Browse files
committed
Auto merge of rust-lang#97925 - the8472:cgroupv1, r=joshtriplett
Add cgroupv1 support to available_parallelism Fixes rust-lang#97549 My dev machine uses cgroup v2 so I was only able to test that code path. So the v1 code path is written only based on documentation. I could use some help testing that it works on a machine with cgroups v1: ``` $ x.py build --stage 1 # quota.rs fn main() { println!("{:?}", std::thread::available_parallelism()); } # assuming stage1 is linked in rustup $ rust +stage1 quota.rs # spawn a new cgroup scope for the current user $ sudo systemd-run -p CPUQuota="300%" --uid=$(id -u) -tdS # should print Ok(3) $ ./quota ``` If it doesn't work as expected an strace, the contents of `/proc/self/cgroups` and the structure of `/sys/fs/cgroups` would help.
2 parents 2e43d06 + 2e33c81 commit e55c53c

File tree

3 files changed

+196
-47
lines changed

3 files changed

+196
-47
lines changed

library/std/src/lib.rs

+1
Original file line numberDiff line numberDiff line change
@@ -276,6 +276,7 @@
276276
#![feature(hasher_prefixfree_extras)]
277277
#![feature(hashmap_internals)]
278278
#![feature(int_error_internals)]
279+
#![feature(is_some_with)]
279280
#![feature(maybe_uninit_slice)]
280281
#![feature(maybe_uninit_write_slice)]
281282
#![feature(mixed_integer_ops)]

library/std/src/sys/unix/thread.rs

+189-46
Original file line numberDiff line numberDiff line change
@@ -285,7 +285,7 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
285285
))] {
286286
#[cfg(any(target_os = "android", target_os = "linux"))]
287287
{
288-
let quota = cgroup2_quota().max(1);
288+
let quota = cgroups::quota().max(1);
289289
let mut set: libc::cpu_set_t = unsafe { mem::zeroed() };
290290
unsafe {
291291
if libc::sched_getaffinity(0, mem::size_of::<libc::cpu_set_t>(), &mut set) == 0 {
@@ -379,49 +379,88 @@ pub fn available_parallelism() -> io::Result<NonZeroUsize> {
379379
}
380380
}
381381

382-
/// Returns cgroup CPU quota in core-equivalents, rounded down, or usize::MAX if the quota cannot
383-
/// be determined or is not set.
384382
#[cfg(any(target_os = "android", target_os = "linux"))]
385-
fn cgroup2_quota() -> usize {
383+
mod cgroups {
384+
//! Currently not covered
385+
//! * cgroup v2 in non-standard mountpoints
386+
//! * paths containing control characters or spaces, since those would be escaped in procfs
387+
//! output and we don't unescape
388+
use crate::borrow::Cow;
386389
use crate::ffi::OsString;
387390
use crate::fs::{try_exists, File};
388391
use crate::io::Read;
392+
use crate::io::{BufRead, BufReader};
389393
use crate::os::unix::ffi::OsStringExt;
394+
use crate::path::Path;
390395
use crate::path::PathBuf;
396+
use crate::str::from_utf8;
391397

392-
let mut quota = usize::MAX;
393-
if cfg!(miri) {
394-
// Attempting to open a file fails under default flags due to isolation.
395-
// And Miri does not have parallelism anyway.
396-
return quota;
397-
}
398-
399-
let _: Option<()> = try {
400-
let mut buf = Vec::with_capacity(128);
401-
// find our place in the cgroup hierarchy
402-
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
403-
let cgroup_path = buf
404-
.split(|&c| c == b'\n')
405-
.filter_map(|line| {
406-
let mut fields = line.splitn(3, |&c| c == b':');
407-
// expect cgroupv2 which has an empty 2nd field
408-
if fields.nth(1) != Some(b"") {
409-
return None;
410-
}
411-
let path = fields.last()?;
412-
// skip leading slash
413-
Some(path[1..].to_owned())
414-
})
415-
.next()?;
416-
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
398+
#[derive(PartialEq)]
399+
enum Cgroup {
400+
V1,
401+
V2,
402+
}
403+
404+
/// Returns cgroup CPU quota in core-equivalents, rounded down or usize::MAX if the quota cannot
405+
/// be determined or is not set.
406+
pub(super) fn quota() -> usize {
407+
let mut quota = usize::MAX;
408+
if cfg!(miri) {
409+
// Attempting to open a file fails under default flags due to isolation.
410+
// And Miri does not have parallelism anyway.
411+
return quota;
412+
}
413+
414+
let _: Option<()> = try {
415+
let mut buf = Vec::with_capacity(128);
416+
// find our place in the cgroup hierarchy
417+
File::open("/proc/self/cgroup").ok()?.read_to_end(&mut buf).ok()?;
418+
let (cgroup_path, version) =
419+
buf.split(|&c| c == b'\n').fold(None, |previous, line| {
420+
let mut fields = line.splitn(3, |&c| c == b':');
421+
// 2nd field is a list of controllers for v1 or empty for v2
422+
let version = match fields.nth(1) {
423+
Some(b"") => Cgroup::V2,
424+
Some(controllers)
425+
if from_utf8(controllers)
426+
.is_ok_and(|c| c.split(",").any(|c| c == "cpu")) =>
427+
{
428+
Cgroup::V1
429+
}
430+
_ => return previous,
431+
};
432+
433+
// already-found v1 trumps v2 since it explicitly specifies its controllers
434+
if previous.is_some() && version == Cgroup::V2 {
435+
return previous;
436+
}
437+
438+
let path = fields.last()?;
439+
// skip leading slash
440+
Some((path[1..].to_owned(), version))
441+
})?;
442+
let cgroup_path = PathBuf::from(OsString::from_vec(cgroup_path));
443+
444+
quota = match version {
445+
Cgroup::V1 => quota_v1(cgroup_path),
446+
Cgroup::V2 => quota_v2(cgroup_path),
447+
};
448+
};
449+
450+
quota
451+
}
452+
453+
fn quota_v2(group_path: PathBuf) -> usize {
454+
let mut quota = usize::MAX;
417455

418456
let mut path = PathBuf::with_capacity(128);
419457
let mut read_buf = String::with_capacity(20);
420458

459+
// standard mount location defined in file-hierarchy(7) manpage
421460
let cgroup_mount = "/sys/fs/cgroup";
422461

423462
path.push(cgroup_mount);
424-
path.push(&cgroup_path);
463+
path.push(&group_path);
425464

426465
path.push("cgroup.controllers");
427466

@@ -432,30 +471,134 @@ fn cgroup2_quota() -> usize {
432471

433472
path.pop();
434473

435-
while path.starts_with(cgroup_mount) {
436-
path.push("cpu.max");
474+
let _: Option<()> = try {
475+
while path.starts_with(cgroup_mount) {
476+
path.push("cpu.max");
477+
478+
read_buf.clear();
479+
480+
if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
481+
let raw_quota = read_buf.lines().next()?;
482+
let mut raw_quota = raw_quota.split(' ');
483+
let limit = raw_quota.next()?;
484+
let period = raw_quota.next()?;
485+
match (limit.parse::<usize>(), period.parse::<usize>()) {
486+
(Ok(limit), Ok(period)) => {
487+
quota = quota.min(limit / period);
488+
}
489+
_ => {}
490+
}
491+
}
437492

438-
read_buf.clear();
493+
path.pop(); // pop filename
494+
path.pop(); // pop dir
495+
}
496+
};
439497

440-
if File::open(&path).and_then(|mut f| f.read_to_string(&mut read_buf)).is_ok() {
441-
let raw_quota = read_buf.lines().next()?;
442-
let mut raw_quota = raw_quota.split(' ');
443-
let limit = raw_quota.next()?;
444-
let period = raw_quota.next()?;
445-
match (limit.parse::<usize>(), period.parse::<usize>()) {
446-
(Ok(limit), Ok(period)) => {
447-
quota = quota.min(limit / period);
448-
}
498+
quota
499+
}
500+
501+
fn quota_v1(group_path: PathBuf) -> usize {
502+
let mut quota = usize::MAX;
503+
let mut path = PathBuf::with_capacity(128);
504+
let mut read_buf = String::with_capacity(20);
505+
506+
// Hardcode commonly used locations mentioned in the cgroups(7) manpage
507+
// if that doesn't work scan mountinfo and adjust `group_path` for bind-mounts
508+
let mounts: &[fn(&Path) -> Option<(_, &Path)>] = &[
509+
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu"), p)),
510+
|p| Some((Cow::Borrowed("/sys/fs/cgroup/cpu,cpuacct"), p)),
511+
// this can be expensive on systems with tons of mountpoints
512+
// but we only get to this point when /proc/self/cgroups explicitly indicated
513+
// this process belongs to a cpu-controller cgroup v1 and the defaults didn't work
514+
find_mountpoint,
515+
];
516+
517+
for mount in mounts {
518+
let Some((mount, group_path)) = mount(&group_path) else { continue };
519+
520+
path.clear();
521+
path.push(mount.as_ref());
522+
path.push(&group_path);
523+
524+
// skip if we guessed the mount incorrectly
525+
if matches!(try_exists(&path), Err(_) | Ok(false)) {
526+
continue;
527+
}
528+
529+
while path.starts_with(mount.as_ref()) {
530+
let mut parse_file = |name| {
531+
path.push(name);
532+
read_buf.clear();
533+
534+
let f = File::open(&path);
535+
path.pop(); // restore buffer before any early returns
536+
f.ok()?.read_to_string(&mut read_buf).ok()?;
537+
let parsed = read_buf.trim().parse::<usize>().ok()?;
538+
539+
Some(parsed)
540+
};
541+
542+
let limit = parse_file("cpu.cfs_quota_us");
543+
let period = parse_file("cpu.cfs_period_us");
544+
545+
match (limit, period) {
546+
(Some(limit), Some(period)) => quota = quota.min(limit / period),
449547
_ => {}
450548
}
549+
550+
path.pop();
451551
}
452552

453-
path.pop(); // pop filename
454-
path.pop(); // pop dir
553+
// we passed the try_exists above so we should have traversed the correct hierarchy
554+
// when reaching this line
555+
break;
455556
}
456-
};
457557

458-
quota
558+
quota
559+
}
560+
561+
/// Scan mountinfo for cgroup v1 mountpoint with a cpu controller
562+
///
563+
/// If the cgroupfs is a bind mount then `group_path` is adjusted to skip
564+
/// over the already-included prefix
565+
fn find_mountpoint(group_path: &Path) -> Option<(Cow<'static, str>, &Path)> {
566+
let mut reader = BufReader::new(File::open("/proc/self/mountinfo").ok()?);
567+
let mut line = String::with_capacity(256);
568+
loop {
569+
line.clear();
570+
if reader.read_line(&mut line).ok()? == 0 {
571+
break;
572+
}
573+
574+
let line = line.trim();
575+
let mut items = line.split(' ');
576+
577+
let sub_path = items.nth(3)?;
578+
let mount_point = items.next()?;
579+
let mount_opts = items.next_back()?;
580+
let filesystem_type = items.nth_back(1)?;
581+
582+
if filesystem_type != "cgroup" || !mount_opts.split(',').any(|opt| opt == "cpu") {
583+
// not a cgroup / not a cpu-controller
584+
continue;
585+
}
586+
587+
let sub_path = Path::new(sub_path).strip_prefix("/").ok()?;
588+
589+
if !group_path.starts_with(sub_path) {
590+
// this is a bind-mount and the bound subdirectory
591+
// does not contain the cgroup this process belongs to
592+
continue;
593+
}
594+
595+
let trimmed_group_path = group_path.strip_prefix(sub_path).ok()?;
596+
597+
return Some((Cow::Owned(mount_point.to_owned()), trimmed_group_path));
598+
}
599+
600+
None
601+
}
459602
}
460603

461604
#[cfg(all(

library/std/src/thread/mod.rs

+6-1
Original file line numberDiff line numberDiff line change
@@ -1577,10 +1577,15 @@ fn _assert_sync_and_send() {
15771577
///
15781578
/// On Linux:
15791579
/// - It may overcount the amount of parallelism available when limited by a
1580-
/// process-wide affinity mask or cgroup quotas and cgroup2 fs or `sched_getaffinity()` can't be
1580+
/// process-wide affinity mask or cgroup quotas and `sched_getaffinity()` or cgroup fs can't be
15811581
/// queried, e.g. due to sandboxing.
15821582
/// - It may undercount the amount of parallelism if the current thread's affinity mask
15831583
/// does not reflect the process' cpuset, e.g. due to pinned threads.
1584+
/// - If the process is in a cgroup v1 cpu controller, this may need to
1585+
/// scan mountpoints to find the corresponding cgroup v1 controller,
1586+
/// which may take time on systems with large numbers of mountpoints.
1587+
/// (This does not apply to cgroup v2, or to processes not in a
1588+
/// cgroup.)
15841589
///
15851590
/// On all targets:
15861591
/// - It may overcount the amount of parallelism available when running in a VM

0 commit comments

Comments
 (0)