#245: Explain unsafe contracts of core::simd

workingjubilee · web-flow · commit 78a18c3433c1 · 2022-02-10T09:32:44.000-08:00
* Explain unsafe contracts of core::simd

This permeates the module with remarks on safety for pub methods,
layout of the Simd type, correct use of intrinsics, et cetera.
This is mostly to help others curious about how core::simd works,
including other Rust contributors, `unsafe` library authors,
and eventually ourselves.
diff --git a/crates/core_simd/src/intrinsics.rs b/crates/core_simd/src/intrinsics.rs
@@ -2,16 +2,31 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
 
 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
     /// add/fadd
     pub(crate) fn simd_add<T>(x: T, y: T) -> T;
 
     /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
 
     /// mul/fmul
     pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
@@ -20,19 +35,22 @@ extern "platform-intrinsic" {
     /// ints and uints: {s,u}div incur UB if division by zero occurs.
     /// ints: sdiv is UB for int::MIN / -1.
     /// floats: fdiv is never UB, but may create NaNs or infinities.
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
 
     /// urem/srem/frem
     /// ints and uints: {s,u}rem incur UB if division by zero occurs.
     /// ints: srem is UB for int::MIN / -1.
     /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
 
     /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
 
-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
 
     /// and
     pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -44,6 +62,9 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
     /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
     pub(crate) fn simd_cast<T, U>(x: T) -> U;
     /// follows Rust's `T as U` semantics, including saturating float casts
     /// which amounts to the same as `simd_cast` for many cases
@@ -63,6 +84,7 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
     pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
 
+    // these return Simd<int, N> with the same BITS size as the inputs
     pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -71,19 +93,31 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
 
     // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
     pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
 
+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
     pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
 
     // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
 
     // reductions
+    // llvm.vector.reduce.{add,fadd}
     pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
     pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
     #[allow(unused)]
     pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -100,7 +134,10 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
 
     // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
     #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
diff --git a/crates/core_simd/src/lib.rs b/crates/core_simd/src/lib.rs
@@ -3,6 +3,7 @@
     const_fn_trait_bound,
     convert_float_to_int,
     decl_macro,
+    intra_doc_pointers,
     platform_intrinsics,
     repr_simd,
     simd_ffi,
diff --git a/crates/core_simd/src/round.rs b/crates/core_simd/src/round.rs
@@ -19,6 +19,11 @@ macro_rules! implement {
             /// * Not be NaN
             /// * Not be infinite
             /// * Be representable in the return type, after truncating off its fractional part
+            ///
+            /// If these requirements are infeasible or costly, consider using the safe function [cast],
+            /// which saturates on conversion.
+            ///
+            /// [cast]: Simd::cast
             #[inline]
             pub unsafe fn to_int_unchecked<I>(self) -> Simd<I, LANES>
             where
diff --git a/crates/core_simd/src/select.rs b/crates/core_simd/src/select.rs
@@ -11,6 +11,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::{Simd, Mask};
@@ -31,6 +32,8 @@ where
     where
         U: SimdElement<Mask = T>,
     {
+        // Safety: The mask has been cast to a vector of integers,
+        // and the operands to select between are vectors of the same type and length.
         unsafe { intrinsics::simd_select(self.to_int(), true_values, false_values) }
     }
 
@@ -39,6 +42,7 @@ where
     /// For each lane in the mask, choose the corresponding lane from `true_values` if
     /// that lane mask is true, and `false_values` if that lane mask is false.
     ///
+    /// # Examples
     /// ```
     /// # #![feature(portable_simd)]
     /// # #[cfg(feature = "std")] use core_simd::Mask;
diff --git a/crates/core_simd/src/vector.rs b/crates/core_simd/src/vector.rs
@@ -44,6 +44,47 @@ use crate::simd::{LaneCount, Mask, MaskElement, SupportedLaneCount};
 ///
 /// [`Wrapping<T>`]: core::num::Wrapping
 ///
+/// # Layout
+/// `Simd<T, N>` has a layout similar to `[T; N]` (identical "shapes"), but with a greater alignment.
+/// `[T; N]` is aligned to `T`, but `Simd<T, N>` will have an alignment based on both `T` and `N`.
+/// It is thus sound to [`transmute`] `Simd<T, N>` to `[T; N]`, and will typically optimize to zero cost,
+/// but the reverse transmutation is more likely to require a copy the compiler cannot simply elide.
+///
+/// # ABI "Features"
+/// Due to Rust's safety guarantees, `Simd<T, N>` is currently passed to and from functions via memory, not SIMD registers,
+/// except as an optimization. `#[inline]` hints are recommended on functions that accept `Simd<T, N>` or return it.
+/// The need for this may be corrected in the future.
+///
+/// # Safe SIMD with Unsafe Rust
+///
+/// Operations with `Simd` are typically safe, but there are many reasons to want to combine SIMD with `unsafe` code.
+/// Care must be taken to respect differences between `Simd` and other types it may be transformed into or derived from.
+/// In particular, the layout of `Simd<T, N>` may be similar to `[T; N]`, and may allow some transmutations,
+/// but references to `[T; N]` are not interchangeable with those to `Simd<T, N>`.
+/// Thus, when using `unsafe` Rust to read and write `Simd<T, N>` through [raw pointers], it is a good idea to first try with
+/// [`read_unaligned`] and [`write_unaligned`]. This is because:
+/// - [`read`] and [`write`] require full alignment (in this case, `Simd<T, N>`'s alignment)
+/// - the likely source for reading or destination for writing `Simd<T, N>` is [`[T]`](slice) and similar types, aligned to `T`
+/// - combining these actions would violate the `unsafe` contract and explode the program into a puff of **undefined behavior**
+/// - the compiler can implicitly adjust layouts to make unaligned reads or writes fully aligned if it sees the optimization
+/// - most contemporary processors suffer no performance penalty for "unaligned" reads and writes that are aligned at runtime
+///
+/// By imposing less obligations, unaligned functions are less likely to make the program unsound,
+/// and may be just as fast as stricter alternatives.
+/// When trying to guarantee alignment, [`[T]::as_simd`][as_simd] is an option for converting `[T]` to `[Simd<T, N>]`,
+/// and allows soundly operating on an aligned SIMD body, but it may cost more time when handling the scalar head and tail.
+/// If these are not sufficient, then it is most ideal to design data structures to be already aligned
+/// to the `Simd<T, N>` you wish to use before using `unsafe` Rust to read or write.
+/// More conventional ways to compensate for these facts, like materializing `Simd` to or from an array first,
+/// are handled by safe methods like [`Simd::from_array`] and [`Simd::from_slice`].
+///
+/// [`transmute`]: core::mem::transmute
+/// [raw pointers]: pointer
+/// [`read_unaligned`]: pointer::read_unaligned
+/// [`write_unaligned`]: pointer::write_unaligned
+/// [`read`]: pointer::read
+/// [`write`]: pointer::write
+/// [as_simd]: slice::as_simd
 #[repr(simd)]
 pub struct Simd<T, const LANES: usize>([T; LANES])
 where
@@ -133,6 +174,7 @@ where
     #[inline]
     #[cfg(not(bootstrap))]
     pub fn cast<U: SimdElement>(self) -> Simd<U, LANES> {
+        // Safety: The input argument is a vector of a known SIMD type.
         unsafe { intrinsics::simd_as(self) }
     }