model-checking
diff --git a/‎portable-simd/crates/core_simd/examples/spectral_norm.rs
+77 b/‎portable-simd/crates/core_simd/examples/spectral_norm.rs
+77
diff --git a/‎portable-simd/crates/core_simd/src/comparisons.rs
+12 b/‎portable-simd/crates/core_simd/src/comparisons.rs
+12
diff --git a/‎portable-simd/crates/core_simd/src/intrinsics.rs
+57-11 b/‎portable-simd/crates/core_simd/src/intrinsics.rs
+57-11
diff --git a/‎portable-simd/crates/core_simd/src/lib.rs
+2 b/‎portable-simd/crates/core_simd/src/lib.rs
+2
diff --git a/‎portable-simd/crates/core_simd/src/masks.rs
+13-18 b/‎portable-simd/crates/core_simd/src/masks.rs
+13-18
@@ -0,0 +1,77 @@
+#![feature(portable_simd)]
+
+use core_simd::simd::*;
+
+fn a(i: usize, j: usize) -> f64 {
+    ((i + j) * (i + j + 1) / 2 + i + 1) as f64
+}
+
+fn mult_av(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atv(v: &[f64], out: &mut [f64]) {
+    assert!(v.len() == out.len());
+    assert!(v.len() % 2 == 0);
+
+    for (i, out) in out.iter_mut().enumerate() {
+        let mut sum = f64x2::splat(0.0);
+
+        let mut j = 0;
+        while j < v.len() {
+            let b = f64x2::from_slice(&v[j..]);
+            let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
+            sum += b / a;
+            j += 2
+        }
+        *out = sum.horizontal_sum();
+    }
+}
+
+fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
+    mult_av(v, tmp);
+    mult_atv(tmp, out);
+}
+
+pub fn spectral_norm(n: usize) -> f64 {
+    assert!(n % 2 == 0, "only even lengths are accepted");
+
+    let mut u = vec![1.0; n];
+    let mut v = u.clone();
+    let mut tmp = u.clone();
+
+    for _ in 0..10 {
+        mult_atav(&u, &mut v, &mut tmp);
+        mult_atav(&v, &mut u, &mut tmp);
+    }
+    (dot(&u, &v) / dot(&v, &v)).sqrt()
+}
+
+fn dot(x: &[f64], y: &[f64]) -> f64 {
+    // This is auto-vectorized:
+    x.iter().zip(y).map(|(&x, &y)| x * y).sum()
+}
+
+#[cfg(test)]
+#[test]
+fn test() {
+    assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
+}
+
+fn main() {
+    // Empty main to make cargo happy
+}
@@ -10,13 +10,17 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
     }
 
     /// Test if each lane is not equal to the corresponding lane in `other`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
     }
 }
@@ -30,27 +34,35 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
     }
 
     /// Test if each lane is greater than the corresponding lane in `other`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
     }
 
     /// Test if each lane is less than or equal to the corresponding lane in `other`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
     }
 
     /// Test if each lane is greater than or equal to the corresponding lane in `other`.
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
+        // Safety: `self` is a vector, and the result of the comparison
+        // is always a valid mask.
         unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
     }
 }
@@ -2,31 +2,55 @@
 //! crate.
 //!
 //! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
+//!
+//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
+//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
+//!   poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
+//!   poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
+//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
+//!   this means that division by poison or undef is like division by zero, which means it inflicts...
+//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
+//!   LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
+//!   The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
+//!   and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
+//!
+//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
+//!
+//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
 
 /// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
-/// simply lowered to the matching LLVM instructions by the compiler.  The associated instruction
-/// is documented alongside each intrinsic.
+/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
+/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
 extern "platform-intrinsic" {
     /// add/fadd
     pub(crate) fn simd_add<T>(x: T, y: T) -> T;
 
     /// sub/fsub
-    pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
 
     /// mul/fmul
     pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
 
     /// udiv/sdiv/fdiv
-    pub(crate) fn simd_div<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}div incur UB if division by zero occurs.
+    /// ints: sdiv is UB for int::MIN / -1.
+    /// floats: fdiv is never UB, but may create NaNs or infinities.
+    pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
 
     /// urem/srem/frem
-    pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
+    /// ints and uints: {s,u}rem incur UB if division by zero occurs.
+    /// ints: srem is UB for int::MIN / -1.
+    /// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
+    pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
 
     /// shl
-    pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
+    /// for (u)ints. poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
 
-    /// lshr/ashr
-    pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
+    /// ints: ashr
+    /// uints: lshr
+    /// poison if rhs >= lhs::BITS
+    pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
 
     /// and
     pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -38,12 +62,18 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
 
     /// fptoui/fptosi/uitofp/sitofp
+    /// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
+    /// but the truncated value must fit in the target type or the result is poison.
+    /// use `simd_as` instead for a cast that performs a saturating conversion.
     pub(crate) fn simd_cast<T, U>(x: T) -> U;
     /// follows Rust's `T as U` semantics, including saturating float casts
     /// which amounts to the same as `simd_cast` for many cases
     pub(crate) fn simd_as<T, U>(x: T) -> U;
 
     /// neg/fneg
+    /// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
+    /// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
+    /// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
     pub(crate) fn simd_neg<T>(x: T) -> T;
 
     /// fabs
@@ -53,6 +83,7 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
     pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
 
+    // these return Simd<int, N> with the same BITS size as the inputs
     pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
     pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -61,19 +92,31 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
 
     // shufflevector
+    // idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
     pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
 
+    /// llvm.masked.gather
+    /// like a loop of pointer reads
+    /// val: vector of values to select if a lane is masked
+    /// ptr: vector of pointers to read from
+    /// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
+    /// note, the LLVM intrinsic accepts a mask vector of <N x i1>
+    /// FIXME: review this if/when we fix up our mask story in general?
     pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
+    /// llvm.masked.scatter
+    /// like gather, but more spicy, as it writes instead of reads
     pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
 
     // {s,u}add.sat
     pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
 
     // {s,u}sub.sat
-    pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
+    pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
 
     // reductions
+    // llvm.vector.reduce.{add,fadd}
     pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
+    // llvm.vector.reduce.{mul,fmul}
     pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
     #[allow(unused)]
     pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -90,7 +133,10 @@ extern "platform-intrinsic" {
     pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
 
     // select
-    pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
+    // first argument is a vector of integers, -1 (all bits 1) is "true"
+    // logically equivalent to (yes & m) | (no & (m^-1),
+    // but you can use it on floats.
+    pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
     #[allow(unused)]
-    pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
+    pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
 }
@@ -1,7 +1,9 @@
 #![cfg_attr(not(feature = "std"), no_std)]
 #![feature(
     const_fn_trait_bound,
+    convert_float_to_int,
     decl_macro,
+    intra_doc_pointers,
     platform_intrinsics,
     repr_simd,
     simd_ffi,
 
@@ -12,8 +12,10 @@
 )]
 mod mask_impl;
 
-use crate::simd::intrinsics;
-use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
+mod to_bitmask;
+pub use to_bitmask::ToBitMask;
+
+use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
 use core::cmp::Ordering;
 use core::{fmt, mem};
 
@@ -42,6 +44,9 @@ mod sealed {
 use sealed::Sealed;
 
 /// Marker trait for types that may be used as SIMD mask elements.
+///
+/// # Safety
+/// Type must be a signed integer.
 pub unsafe trait MaskElement: SimdElement + Sealed {}
 
 macro_rules! impl_element {
@@ -149,6 +154,7 @@ where
     #[inline]
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
+        // Safety: the caller must confirm this invariant
         unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
     }
 
@@ -161,6 +167,7 @@ where
     #[must_use = "method returns a new mask and does not mutate the original value"]
     pub fn from_int(value: Simd<T, LANES>) -> Self {
         assert!(T::valid(value), "all values must be either 0 or -1",);
+        // Safety: the validity has been checked
         unsafe { Self::from_int_unchecked(value) }
     }
 
@@ -179,6 +186,7 @@ where
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
+        // Safety: the caller must confirm this invariant
         unsafe { self.0.test_unchecked(lane) }
     }
 
@@ -190,6 +198,7 @@ where
     #[must_use = "method returns a new bool and does not mutate the original value"]
     pub fn test(&self, lane: usize) -> bool {
         assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
         unsafe { self.test_unchecked(lane) }
     }
 
@@ -199,6 +208,7 @@ where
     /// `lane` must be less than `LANES`.
     #[inline]
     pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
+        // Safety: the caller must confirm this invariant
         unsafe {
             self.0.set_unchecked(lane, value);
         }
@@ -211,27 +221,12 @@ where
     #[inline]
     pub fn set(&mut self, lane: usize, value: bool) {
         assert!(lane < LANES, "lane index out of range");
+        // Safety: the lane index has been checked
         unsafe {
             self.set_unchecked(lane, value);
         }
     }
 
-    /// Convert this mask to a bitmask, with one bit set per lane.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new array and does not mutate the original value"]
-    pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
-        self.0.to_bitmask()
-    }
-
-    /// Convert a bitmask to a mask.
-    #[cfg(feature = "generic_const_exprs")]
-    #[inline]
-    #[must_use = "method returns a new mask and does not mutate the original value"]
-    pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
-        Self(mask_impl::Mask::from_bitmask(bitmask))
-    }
-
     /// Returns true if any lane is set, or false otherwise.
     #[inline]
     #[must_use = "method returns a new bool and does not mutate the original value"]