Skip to content

Commit fe76e14

Browse files
committed
Auto merge of #111646 - Voultapher:restore-branchless-code-gen-for-merge, r=cuviper
Use code with reliable branchless code-gen for slice::sort merge The recent LLVM 16 update changes code-gen to be not branchless anymore, in the slice::sort implementation merge function. This improves performance by 30% for random patterns, restoring the performance to the state with LLVM 15. Fixes #111559
2 parents 77f4f82 + 42655ff commit fe76e14

File tree

1 file changed

+12
-26
lines changed

1 file changed

+12
-26
lines changed

Diff for: library/core/src/slice/sort.rs

+12-26
Original file line numberDiff line numberDiff line change
@@ -1085,12 +1085,12 @@ where
10851085

10861086
// SAFETY: left and right must be valid and part of v same for out.
10871087
unsafe {
1088-
let to_copy = if is_less(&*right, &**left) {
1089-
get_and_increment(&mut right)
1090-
} else {
1091-
get_and_increment(left)
1092-
};
1093-
ptr::copy_nonoverlapping(to_copy, get_and_increment(out), 1);
1088+
let is_l = is_less(&*right, &**left);
1089+
let to_copy = if is_l { right } else { *left };
1090+
ptr::copy_nonoverlapping(to_copy, *out, 1);
1091+
*out = out.add(1);
1092+
right = right.add(is_l as usize);
1093+
*left = left.add(!is_l as usize);
10941094
}
10951095
}
10961096
} else {
@@ -1113,32 +1113,18 @@ where
11131113

11141114
// SAFETY: left and right must be valid and part of v same for out.
11151115
unsafe {
1116-
let to_copy = if is_less(&*right.sub(1), &*left.sub(1)) {
1117-
decrement_and_get(left)
1118-
} else {
1119-
decrement_and_get(right)
1120-
};
1121-
ptr::copy_nonoverlapping(to_copy, decrement_and_get(&mut out), 1);
1116+
let is_l = is_less(&*right.sub(1), &*left.sub(1));
1117+
*left = left.sub(is_l as usize);
1118+
*right = right.sub(!is_l as usize);
1119+
let to_copy = if is_l { *left } else { *right };
1120+
out = out.sub(1);
1121+
ptr::copy_nonoverlapping(to_copy, out, 1);
11221122
}
11231123
}
11241124
}
11251125
// Finally, `hole` gets dropped. If the shorter run was not fully consumed, whatever remains of
11261126
// it will now be copied into the hole in `v`.
11271127

1128-
unsafe fn get_and_increment<T>(ptr: &mut *mut T) -> *mut T {
1129-
let old = *ptr;
1130-
1131-
// SAFETY: ptr.add(1) must still be a valid pointer and part of `v`.
1132-
*ptr = unsafe { ptr.add(1) };
1133-
old
1134-
}
1135-
1136-
unsafe fn decrement_and_get<T>(ptr: &mut *mut T) -> *mut T {
1137-
// SAFETY: ptr.sub(1) must still be a valid pointer and part of `v`.
1138-
*ptr = unsafe { ptr.sub(1) };
1139-
*ptr
1140-
}
1141-
11421128
// When dropped, copies the range `start..end` into `dest..`.
11431129
struct MergeHole<T> {
11441130
start: *mut T,

0 commit comments

Comments
 (0)