Skip to content

Commit 3d2c4ff

Browse files
committed
Optimize pointer alignment in utf8 validation
This uses (and reuses) the u8 arrays's inherent block alignment when checking whether the current index is block aligned. I initially thought that this would just move the expensive `align_offset` call out of the while loop and replace it with a subtraction and bitwise AND. But it appears this optimizes much better, too... before: https://rust.godbolt.org/z/WIPvWl after: https://rust.godbolt.org/z/-jBPoW https://github.com/jridgewell/faster-from_utf8/tree/pointer-alignment ``` test from_utf8_2_bytes_fast ... bench: 310 ns/iter (+/- 42) = 1290 MB/s test from_utf8_2_bytes_regular ... bench: 309 ns/iter (+/- 24) = 1294 MB/s test from_utf8_3_bytes_fast ... bench: 1,027 ns/iter (+/- 62) = 1168 MB/s test from_utf8_3_bytes_regular ... bench: 1,513 ns/iter (+/- 611) = 793 MB/s test from_utf8_4_bytes_fast ... bench: 1,788 ns/iter (+/- 26) = 1342 MB/s test from_utf8_4_bytes_regular ... bench: 1,907 ns/iter (+/- 181) = 1258 MB/s test from_utf8_all_bytes_fast ... bench: 3,463 ns/iter (+/- 97) = 1155 MB/s test from_utf8_all_bytes_regular ... bench: 4,083 ns/iter (+/- 89) = 979 MB/s test from_utf8_ascii_fast ... bench: 88 ns/iter (+/- 4) = 28988 MB/s test from_utf8_ascii_regular ... bench: 88 ns/iter (+/- 8) = 28988 MB/s test from_utf8_cyr_fast ... bench: 7,707 ns/iter (+/- 531) = 665 MB/s test from_utf8_cyr_regular ... bench: 8,202 ns/iter (+/- 135) = 625 MB/s test from_utf8_enwik8_fast ... bench: 1,135,756 ns/iter (+/- 84,450) = 8804 MB/s test from_utf8_enwik8_regular ... bench: 1,145,468 ns/iter (+/- 79,601) = 8730 MB/s test from_utf8_jawik10_fast ... bench: 12,723,844 ns/iter (+/- 473,247) = 785 MB/s test from_utf8_jawik10_regular ... bench: 13,384,596 ns/iter (+/- 666,997) = 747 MB/s test from_utf8_mixed_fast ... bench: 2,321 ns/iter (+/- 123) = 2081 MB/s test from_utf8_mixed_regular ... bench: 2,702 ns/iter (+/- 408) = 1788 MB/s test from_utf8_mostlyasc_fast ... bench: 249 ns/iter (+/- 10) = 14666 MB/s test from_utf8_mostlyasc_regular ... bench: 276 ns/iter (+/- 5) = 13231 MB/s ```
1 parent 4137901 commit 3d2c4ff

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

src/libcore/str/mod.rs

+3-6
Original file line numberDiff line numberDiff line change
@@ -1417,6 +1417,7 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
14171417
let usize_bytes = mem::size_of::<usize>();
14181418
let ascii_block_size = 2 * usize_bytes;
14191419
let blocks_end = if len >= ascii_block_size { len - ascii_block_size + 1 } else { 0 };
1420+
let align = v.as_ptr().align_offset(usize_bytes);
14201421

14211422
while index < len {
14221423
let old_offset = index;
@@ -1496,12 +1497,8 @@ fn run_utf8_validation(v: &[u8]) -> Result<(), Utf8Error> {
14961497
// Ascii case, try to skip forward quickly.
14971498
// When the pointer is aligned, read 2 words of data per iteration
14981499
// until we find a word containing a non-ascii byte.
1499-
let ptr = v.as_ptr();
1500-
let align = unsafe {
1501-
// the offset is safe, because `index` is guaranteed inbounds
1502-
ptr.add(index).align_offset(usize_bytes)
1503-
};
1504-
if align == 0 {
1500+
if align.wrapping_sub(index) % usize_bytes == 0 {
1501+
let ptr = v.as_ptr();
15051502
while index < blocks_end {
15061503
unsafe {
15071504
let block = ptr.add(index) as *const usize;

0 commit comments

Comments
 (0)