3
3
use core:: ascii:: EscapeDefault ;
4
4
5
5
use crate :: fmt:: { self , Write } ;
6
+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
6
7
use crate :: intrinsics:: const_eval_select;
7
- use crate :: { ascii, iter, mem , ops} ;
8
+ use crate :: { ascii, iter, ops} ;
8
9
9
10
#[ cfg( not( test) ) ]
10
11
impl [ u8 ] {
@@ -308,14 +309,6 @@ impl<'a> fmt::Debug for EscapeAscii<'a> {
308
309
}
309
310
}
310
311
311
- /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
312
- /// from `../str/mod.rs`, which does something similar for utf8 validation.
313
- #[ inline]
314
- const fn contains_nonascii ( v : usize ) -> bool {
315
- const NONASCII_MASK : usize = usize:: repeat_u8 ( 0x80 ) ;
316
- ( NONASCII_MASK & v) != 0
317
- }
318
-
319
312
/// ASCII test *without* the chunk-at-a-time optimizations.
320
313
///
321
314
/// This is carefully structured to produce nice small code -- it's smaller in
@@ -346,6 +339,7 @@ pub const fn is_ascii_simple(mut bytes: &[u8]) -> bool {
346
339
///
347
340
/// If any of these loads produces something for which `contains_nonascii`
348
341
/// (above) returns true, then we know the answer is false.
342
+ #[ cfg( not( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ) ]
349
343
#[ inline]
350
344
#[ rustc_allow_const_fn_unstable( const_eval_select) ] // fallback impl has same behavior
351
345
const fn is_ascii ( s : & [ u8 ] ) -> bool {
@@ -356,7 +350,14 @@ const fn is_ascii(s: &[u8]) -> bool {
356
350
if const {
357
351
is_ascii_simple( s)
358
352
} else {
359
- const USIZE_SIZE : usize = mem:: size_of:: <usize >( ) ;
353
+ /// Returns `true` if any byte in the word `v` is nonascii (>= 128). Snarfed
354
+ /// from `../str/mod.rs`, which does something similar for utf8 validation.
355
+ const fn contains_nonascii( v: usize ) -> bool {
356
+ const NONASCII_MASK : usize = usize :: repeat_u8( 0x80 ) ;
357
+ ( NONASCII_MASK & v) != 0
358
+ }
359
+
360
+ const USIZE_SIZE : usize = size_of:: <usize >( ) ;
360
361
361
362
let len = s. len( ) ;
362
363
let align_offset = s. as_ptr( ) . align_offset( USIZE_SIZE ) ;
@@ -366,7 +367,7 @@ const fn is_ascii(s: &[u8]) -> bool {
366
367
//
367
368
// We also do this for architectures where `size_of::<usize>()` isn't
368
369
// sufficient alignment for `usize`, because it's a weird edge case.
369
- if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < mem :: align_of:: <usize >( ) {
370
+ if len < USIZE_SIZE || len < align_offset || USIZE_SIZE < align_of:: <usize >( ) {
370
371
return is_ascii_simple( s) ;
371
372
}
372
373
@@ -400,7 +401,7 @@ const fn is_ascii(s: &[u8]) -> bool {
400
401
// have alignment information it should have given a `usize::MAX` for
401
402
// `align_offset` earlier, sending things through the scalar path instead of
402
403
// this one, so this check should pass if it's reachable.
403
- debug_assert!( word_ptr. is_aligned_to( mem :: align_of:: <usize >( ) ) ) ;
404
+ debug_assert!( word_ptr. is_aligned_to( align_of:: <usize >( ) ) ) ;
404
405
405
406
// Read subsequent words until the last aligned word, excluding the last
406
407
// aligned word by itself to be done in tail check later, to ensure that
@@ -435,3 +436,48 @@ const fn is_ascii(s: &[u8]) -> bool {
435
436
}
436
437
)
437
438
}
439
+
440
+ /// ASCII test optimized to use the `pmovmskb` instruction available on `x86-64`
441
+ /// platforms.
442
+ ///
443
+ /// Other platforms are not likely to benefit from this code structure, so they
444
+ /// use SWAR techniques to test for ASCII in `usize`-sized chunks.
445
+ #[ cfg( all( target_arch = "x86_64" , target_feature = "sse2" ) ) ]
446
+ #[ inline]
447
+ const fn is_ascii ( bytes : & [ u8 ] ) -> bool {
448
+ // Process chunks of 32 bytes at a time in the fast path to enable
449
+ // auto-vectorization and use of `pmovmskb`. Two 128-bit vector registers
450
+ // can be OR'd together and then the resulting vector can be tested for
451
+ // non-ASCII bytes.
452
+ const CHUNK_SIZE : usize = 32 ;
453
+
454
+ let mut i = 0 ;
455
+
456
+ while i + CHUNK_SIZE <= bytes. len ( ) {
457
+ let chunk_end = i + CHUNK_SIZE ;
458
+
459
+ // Get LLVM to produce a `pmovmskb` instruction on x86-64 which
460
+ // creates a mask from the most significant bit of each byte.
461
+ // ASCII bytes are less than 128 (0x80), so their most significant
462
+ // bit is unset.
463
+ let mut count = 0 ;
464
+ while i < chunk_end {
465
+ count += bytes[ i] . is_ascii ( ) as u8 ;
466
+ i += 1 ;
467
+ }
468
+
469
+ // All bytes should be <= 127 so count is equal to chunk size.
470
+ if count != CHUNK_SIZE as u8 {
471
+ return false ;
472
+ }
473
+ }
474
+
475
+ // Process the remaining `bytes.len() % N` bytes.
476
+ let mut is_ascii = true ;
477
+ while i < bytes. len ( ) {
478
+ is_ascii &= bytes[ i] . is_ascii ( ) ;
479
+ i += 1 ;
480
+ }
481
+
482
+ is_ascii
483
+ }
0 commit comments