Skip to content

Commit 717e3aa

Browse files
committed
Use simpler branchy swap logic in tiny merge sort
Avoids the code duplication issue and results in smaller binary size, which after all is the purpose of the feature.
1 parent ae57bdf commit 717e3aa

File tree

2 files changed

+13
-42
lines changed

2 files changed

+13
-42
lines changed

Diff for: core/src/slice/sort/shared/smallsort.rs

+5
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,11 @@ where
378378

379379
/// Swap two values in the slice pointed to by `v_base` at the position `a_pos` and `b_pos` if the
380380
/// value at position `b_pos` is less than the one at position `a_pos`.
381+
///
382+
/// Purposefully not marked `#[inline]`, despite us wanting it to be inlined for integers like
383+
/// types. `is_less` could be a huge function and we want to give the compiler an option to
384+
/// not inline this function. For the same reasons that this function is very perf critical
385+
/// it should be in the same module as the functions that use it.
381386
unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
382387
where
383388
F: FnMut(&T, &T) -> bool,

Diff for: core/src/slice/sort/stable/tiny.rs

+8-42
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Binary-size optimized mergesort inspired by https://github.com/voultapher/tiny-sort-rs.
22
3-
use crate::mem::{ManuallyDrop, MaybeUninit};
3+
use crate::mem::MaybeUninit;
44
use crate::ptr;
55
use crate::slice::sort::stable::merge;
66

@@ -27,49 +27,15 @@ pub fn mergesort<T, F: FnMut(&T, &T) -> bool>(
2727

2828
merge::merge(v, scratch, mid, is_less);
2929
} else if len == 2 {
30-
// Branchless swap the two elements. This reduces the recursion depth and improves
31-
// perf significantly at a small binary-size cost. Trades ~10% perf boost for integers
32-
// for ~50 bytes in the binary.
33-
3430
// SAFETY: We checked the len, the pointers we create are valid and don't overlap.
3531
unsafe {
36-
swap_if_less(v.as_mut_ptr(), 0, 1, is_less);
37-
}
38-
}
39-
}
40-
41-
/// Swap two values in the slice pointed to by `v_base` at the position `a_pos` and `b_pos` if the
42-
/// value at position `b_pos` is less than the one at position `a_pos`.
43-
unsafe fn swap_if_less<T, F>(v_base: *mut T, a_pos: usize, b_pos: usize, is_less: &mut F)
44-
where
45-
F: FnMut(&T, &T) -> bool,
46-
{
47-
// SAFETY: the caller must guarantee that `a` and `b` each added to `v_base` yield valid
48-
// pointers into `v_base`, and are properly aligned, and part of the same allocation.
49-
unsafe {
50-
let v_a = v_base.add(a_pos);
51-
let v_b = v_base.add(b_pos);
32+
let v_base = v.as_mut_ptr();
33+
let v_a = v_base;
34+
let v_b = v_base.add(1);
5235

53-
// PANIC SAFETY: if is_less panics, no scratch memory was created and the slice should still be
54-
// in a well defined state, without duplicates.
55-
56-
// Important to only swap if it is more and not if it is equal. is_less should return false for
57-
// equal, so we don't swap.
58-
let should_swap = is_less(&*v_b, &*v_a);
59-
60-
// This is a branchless version of swap if.
61-
// The equivalent code with a branch would be:
62-
//
63-
// if should_swap {
64-
// ptr::swap(left, right, 1);
65-
// }
66-
67-
// The goal is to generate cmov instructions here.
68-
let left_swap = if should_swap { v_b } else { v_a };
69-
let right_swap = if should_swap { v_a } else { v_b };
70-
71-
let right_swap_tmp = ManuallyDrop::new(ptr::read(right_swap));
72-
ptr::copy(left_swap, v_a, 1);
73-
ptr::copy_nonoverlapping(&*right_swap_tmp, v_b, 1);
36+
if is_less(&*v_b, &*v_a) {
37+
ptr::swap_nonoverlapping(v_a, v_b, 1);
38+
}
39+
}
7440
}
7541
}

0 commit comments

Comments
 (0)