Skip to content

Commit 1efd804

Browse files
committed
Auto merge of rust-lang#81126 - oxalica:retain-early-drop, r=m-ou-se
Optimize Vec::retain Use `copy_non_overlapping` instead of `swap` to reduce memory writes, like what we've done in rust-lang#44355 and `String::retain`. rust-lang#48065 already tried to do this optimization but it is reverted in rust-lang#67300 due to bad codegen of `DrainFilter::drop`. This PR re-implement the drop-then-move approach. I did a [benchmark](https://gist.github.com/oxalica/3360eec9376f22533fcecff02798b698) on small-no-drop, small-need-drop, large-no-drop elements with different predicate functions. It turns out that the new implementation is >20% faster in average for almost all cases. Only 2/24 cases are slower by 3% and 5%. See the link above for more detail. I think regression in may-panic cases is due to drop-guard preventing some optimization. If it's permitted to leak elements when predicate function of element's `drop` panic, the new implementation should be almost always faster than current one. I'm not sure if we should leak on panic, since there is indeed an issue (rust-lang#52267) complains about it before.
2 parents 9ce7268 + 2a11c57 commit 1efd804

File tree

2 files changed

+125
-11
lines changed

2 files changed

+125
-11
lines changed

library/alloc/src/vec/mod.rs

+64-11
Original file line numberDiff line numberDiff line change
@@ -1399,22 +1399,75 @@ impl<T, A: Allocator> Vec<T, A> {
13991399
where
14001400
F: FnMut(&T) -> bool,
14011401
{
1402-
let len = self.len();
1403-
let mut del = 0;
1404-
{
1405-
let v = &mut **self;
1402+
let original_len = self.len();
1403+
// Avoid double drop if the drop guard is not executed,
1404+
// since we may make some holes during the process.
1405+
unsafe { self.set_len(0) };
1406+
1407+
// Vec: [Kept, Kept, Hole, Hole, Hole, Hole, Unchecked, Unchecked]
1408+
// |<- processed len ->| ^- next to check
1409+
// |<- deleted cnt ->|
1410+
// |<- original_len ->|
1411+
// Kept: Elements which predicate returns true on.
1412+
// Hole: Moved or dropped element slot.
1413+
// Unchecked: Unchecked valid elements.
1414+
//
1415+
// This drop guard will be invoked when predicate or `drop` of element panicked.
1416+
// It shifts unchecked elements to cover holes and `set_len` to the correct length.
1417+
// In cases when predicate and `drop` never panick, it will be optimized out.
1418+
struct BackshiftOnDrop<'a, T, A: Allocator> {
1419+
v: &'a mut Vec<T, A>,
1420+
processed_len: usize,
1421+
deleted_cnt: usize,
1422+
original_len: usize,
1423+
}
14061424

1407-
for i in 0..len {
1408-
if !f(&v[i]) {
1409-
del += 1;
1410-
} else if del > 0 {
1411-
v.swap(i - del, i);
1425+
impl<T, A: Allocator> Drop for BackshiftOnDrop<'_, T, A> {
1426+
fn drop(&mut self) {
1427+
if self.deleted_cnt > 0 {
1428+
// SAFETY: Trailing unchecked items must be valid since we never touch them.
1429+
unsafe {
1430+
ptr::copy(
1431+
self.v.as_ptr().add(self.processed_len),
1432+
self.v.as_mut_ptr().add(self.processed_len - self.deleted_cnt),
1433+
self.original_len - self.processed_len,
1434+
);
1435+
}
1436+
}
1437+
// SAFETY: After filling holes, all items are in contiguous memory.
1438+
unsafe {
1439+
self.v.set_len(self.original_len - self.deleted_cnt);
14121440
}
14131441
}
14141442
}
1415-
if del > 0 {
1416-
self.truncate(len - del);
1443+
1444+
let mut g = BackshiftOnDrop { v: self, processed_len: 0, deleted_cnt: 0, original_len };
1445+
1446+
while g.processed_len < original_len {
1447+
// SAFETY: Unchecked element must be valid.
1448+
let cur = unsafe { &mut *g.v.as_mut_ptr().add(g.processed_len) };
1449+
if !f(cur) {
1450+
// Advance early to avoid double drop if `drop_in_place` panicked.
1451+
g.processed_len += 1;
1452+
g.deleted_cnt += 1;
1453+
// SAFETY: We never touch this element again after dropped.
1454+
unsafe { ptr::drop_in_place(cur) };
1455+
// We already advanced the counter.
1456+
continue;
1457+
}
1458+
if g.deleted_cnt > 0 {
1459+
// SAFETY: `deleted_cnt` > 0, so the hole slot must not overlap with current element.
1460+
// We use copy for move, and never touch this element again.
1461+
unsafe {
1462+
let hole_slot = g.v.as_mut_ptr().add(g.processed_len - g.deleted_cnt);
1463+
ptr::copy_nonoverlapping(cur, hole_slot, 1);
1464+
}
1465+
}
1466+
g.processed_len += 1;
14171467
}
1468+
1469+
// All item are processed. This can be optimized to `set_len` by LLVM.
1470+
drop(g);
14181471
}
14191472

14201473
/// Removes all but the first of consecutive elements in the vector that resolve to the same

library/alloc/tests/vec.rs

+61
Original file line numberDiff line numberDiff line change
@@ -287,6 +287,67 @@ fn test_retain() {
287287
assert_eq!(vec, [2, 4]);
288288
}
289289

290+
#[test]
291+
fn test_retain_pred_panic_with_hole() {
292+
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
293+
catch_unwind(AssertUnwindSafe(|| {
294+
let mut v = v.clone();
295+
v.retain(|r| match **r {
296+
0 => true,
297+
1 => false,
298+
2 => true,
299+
_ => panic!(),
300+
});
301+
}))
302+
.unwrap_err();
303+
// Everything is dropped when predicate panicked.
304+
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
305+
}
306+
307+
#[test]
308+
fn test_retain_pred_panic_no_hole() {
309+
let v = (0..5).map(Rc::new).collect::<Vec<_>>();
310+
catch_unwind(AssertUnwindSafe(|| {
311+
let mut v = v.clone();
312+
v.retain(|r| match **r {
313+
0 | 1 | 2 => true,
314+
_ => panic!(),
315+
});
316+
}))
317+
.unwrap_err();
318+
// Everything is dropped when predicate panicked.
319+
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
320+
}
321+
322+
#[test]
323+
fn test_retain_drop_panic() {
324+
struct Wrap(Rc<i32>);
325+
326+
impl Drop for Wrap {
327+
fn drop(&mut self) {
328+
if *self.0 == 3 {
329+
panic!();
330+
}
331+
}
332+
}
333+
334+
let v = (0..5).map(|x| Rc::new(x)).collect::<Vec<_>>();
335+
catch_unwind(AssertUnwindSafe(|| {
336+
let mut v = v.iter().map(|r| Wrap(r.clone())).collect::<Vec<_>>();
337+
v.retain(|w| match *w.0 {
338+
0 => true,
339+
1 => false,
340+
2 => true,
341+
3 => false, // Drop panic.
342+
_ => true,
343+
});
344+
}))
345+
.unwrap_err();
346+
// Other elements are dropped when `drop` of one element panicked.
347+
// The panicked wrapper also has its Rc dropped.
348+
assert!(v.iter().all(|r| Rc::strong_count(r) == 1));
349+
}
350+
290351
#[test]
291352
fn test_dedup() {
292353
fn case(a: Vec<i32>, b: Vec<i32>) {

0 commit comments

Comments
 (0)