Skip to content

Commit 38a83b8

Browse files
committed
Restructure unsafe code and flesh out SAFETY comments
1 parent 35e97a6 commit 38a83b8

File tree

1 file changed

+39
-11
lines changed

1 file changed

+39
-11
lines changed

core/src/defaults/lexer.rs

+39-11
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ unsafe fn find_identifier_end_avx2(input: &str, mut offset: usize) -> usize {
569569

570570
type Chunk = __m256i;
571571

572+
// SAFETY: callers must ensure avx2 intrinsics are supported.
572573
unsafe fn range_mask(x: Chunk, range: RangeInclusive<u8>) -> Chunk {
573574
unsafe {
574575
let lower = _mm256_cmpgt_epi8(_mm256_set1_epi8(*range.end() as i8 + 1), x);
@@ -577,19 +578,36 @@ unsafe fn find_identifier_end_avx2(input: &str, mut offset: usize) -> usize {
577578
}
578579
}
579580

581+
// SAFETY: callers must ensure avx2 intrinsics are supported.
580582
unsafe fn any_non_ascii(chunk: Chunk) -> bool {
581583
unsafe { _mm256_testz_si256(_mm256_set1_epi8(i8::MIN), chunk) == 0 }
582584
}
583585

584586
while (offset + size_of::<Chunk>()) <= input.len() {
585-
// SAFETY: requires that a 32-byte load from `input.as_ptr() + offset` does not touch uninitialised memory.
586-
// The above length check guarantees this.
587+
// SAFETY: `ptr::const_ptr::add` requires that
588+
// * the input ptr and the offset ptr are in-bounds for the same object.
589+
// * the resulting ptr doesn't overflow an isize
590+
//
591+
// The input ptr is valid by because it comes from a rust reference, and the length check
592+
// above guarantees that the offset ptr is in bounds and points into the same allocation.
593+
let chunk_ptr: *const Chunk = unsafe {
594+
// the `loadu` variant of this intrinsic doesn't require aligned addresses
595+
#[allow(clippy::cast_ptr_alignment)]
596+
input.as_ptr().add(offset).cast::<Chunk>()
597+
};
598+
599+
// SAFETY: this load intrinsic requires that
600+
// * `chunk_ptr` can be safely dereferenced
601+
// * avx2 intrinsics are supported
602+
//
603+
// The above length check guarantees that the ptr produced by the above offset is no more
604+
// than `size_of::<Chunk>` from the end of the allocation, and requirement for avx2 is
605+
// forwarded to the caller.
606+
let chunk: Chunk = unsafe { _mm256_loadu_si256(chunk_ptr) };
607+
608+
// SAFETY: requires that avx2 intrinsics are supported
609+
// Requirement forwarded to the caller.
587610
let ident_mask = unsafe {
588-
let chunk = _mm256_loadu_si256(
589-
// the `loadu` variant of this intrinsic doesn't require aligned addresses
590-
#[allow(clippy::cast_ptr_alignment)]
591-
input.as_ptr().add(offset).cast::<Chunk>(),
592-
);
593611
if any_non_ascii(chunk) {
594612
break;
595613
};
@@ -647,13 +665,23 @@ fn find_identifier_end_x86_64(input: &str, offset: usize) -> usize {
647665
}
648666
};
649667
FN.store(fun as Fn, Ordering::Relaxed);
668+
// SAFETY: we just checked that the required intrinsics are supported.
650669
unsafe { fun(input, offset) }
651670
}
652671

653-
unsafe {
654-
let fun = FN.load(Ordering::Relaxed);
655-
core::mem::transmute::<Fn, RealFn>(fun)(input, offset)
656-
}
672+
let fun = FN.load(Ordering::Relaxed);
673+
674+
// SAFETY: requires that
675+
// * `fun` is a valid instance of both types
676+
// * the safety requirements of the unsafe `fun` instance to be upheld
677+
//
678+
// We only ever store instances of `RealFn` inside the `FN` atomic, so it is valid to
679+
// interpret it as a `RealFn`, and any `RealFn` (i.e. a valid function pointer) is safe to
680+
// interpret as `*mut ()` (indeed a safe `as` cast above is enough).
681+
//
682+
// The safety requirements of `fun` are checked above before the store in `FN`, and the
683+
// initial value inside `FN` has no safety requirements.
684+
unsafe { core::mem::transmute::<Fn, RealFn>(fun)(input, offset) }
657685
}
658686

659687
fn find_identifier_end(input: &str, offset: usize) -> usize {

0 commit comments

Comments
 (0)