Skip to content
This repository was archived by the owner on May 28, 2025. It is now read-only.

Commit a7aafd7

Browse files
committed
Add codegen test to ensure the pattern used in convert_while_ascii gets vectorized
1 parent b03d939 commit a7aafd7

File tree

2 files changed

+53
-4
lines changed

2 files changed

+53
-4
lines changed

library/alloc/src/str.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ pub unsafe fn from_boxed_utf8_unchecked(v: Box<[u8]>) -> Box<str> {
613613
#[cfg(not(test))]
614614
#[cfg(not(no_global_oom_handling))]
615615
fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
616-
// process the input in chunks to enable auto-vectorization
616+
// Process the input in chunks to enable auto-vectorization
617617
const USIZE_SIZE: usize = mem::size_of::<usize>();
618618
const MAGIC_UNROLL: usize = 2;
619619
const N: usize = USIZE_SIZE * MAGIC_UNROLL;
@@ -635,9 +635,10 @@ fn convert_while_ascii(s: &str, convert: fn(&u8) -> u8) -> (String, &str) {
635635
is_ascii[j] = chunk[j] <= 127;
636636
}
637637

638-
// auto-vectorization for this check is a bit fragile,
639-
// sum and comparing against the chunk size gives the best result,
640-
// specifically a pmovmsk instruction on x86.
638+
// Auto-vectorization for this check is a bit fragile, sum and comparing against the chunk
639+
// size gives the best result, specifically a pmovmsk instruction on x86.
640+
// There is a codegen test in `issue-123712-str-to-lower-autovectorization.rs` which should
641+
// be updated when this method is changed.
641642
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
642643
break;
643644
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
//@ compile-flags: -Copt-level=3
2+
#![crate_type = "lib"]
3+
4+
/// Ensure that the ascii-prefix loop for `str::to_lowercase` and `str::to_uppercase` uses vector
5+
/// instructions. Since these methods do not get inlined, the relevant code is duplicated here and
6+
/// should be updated when the implementation changes.
7+
// CHECK-LABEL: @lower_while_ascii
8+
// CHECK: [[A:%[0-9]]] = load <16 x i8>
9+
// CHECK-NEXT: [[B:%[0-9]]] = icmp slt <16 x i8> [[A]], zeroinitializer
10+
// CHECK-NEXT: [[C:%[0-9]]] = bitcast <16 x i1> [[B]] to i16
11+
#[no_mangle]
12+
pub fn lower_while_ascii(mut input: &[u8], mut output: &mut [u8]) -> usize {
13+
// process the input in chunks to enable auto-vectorization
14+
const USIZE_SIZE: usize = core::mem::size_of::<usize>();
15+
const MAGIC_UNROLL: usize = 2;
16+
const N: usize = USIZE_SIZE * MAGIC_UNROLL;
17+
18+
output = &mut output[..input.len()];
19+
20+
let mut ascii_prefix_len = 0_usize;
21+
let mut is_ascii = [false; N];
22+
23+
while input.len() >= N {
24+
let chunk = unsafe { input.get_unchecked(..N) };
25+
let out_chunk = unsafe { output.get_unchecked_mut(..N) };
26+
27+
for j in 0..N {
28+
is_ascii[j] = chunk[j] <= 127;
29+
}
30+
31+
// auto-vectorization for this check is a bit fragile,
32+
// sum and comparing against the chunk size gives the best result,
33+
// specifically a pmovmsk instruction on x86.
34+
if is_ascii.iter().map(|x| *x as u8).sum::<u8>() as usize != N {
35+
break;
36+
}
37+
38+
for j in 0..N {
39+
out_chunk[j] = chunk[j].to_ascii_lowercase();
40+
}
41+
42+
ascii_prefix_len += N;
43+
input = unsafe { input.get_unchecked(N..) };
44+
output = unsafe { output.get_unchecked_mut(N..) };
45+
}
46+
47+
ascii_prefix_len
48+
}

0 commit comments

Comments
 (0)