Skip to content

Commit b9ce766

Browse files
1 parent 3606566 commit b9ce766

21 files changed

+438
-123
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#![feature(portable_simd)]
2+
3+
use core_simd::simd::*;
4+
5+
fn a(i: usize, j: usize) -> f64 {
6+
((i + j) * (i + j + 1) / 2 + i + 1) as f64
7+
}
8+
9+
fn mult_av(v: &[f64], out: &mut [f64]) {
10+
assert!(v.len() == out.len());
11+
assert!(v.len() % 2 == 0);
12+
13+
for (i, out) in out.iter_mut().enumerate() {
14+
let mut sum = f64x2::splat(0.0);
15+
16+
let mut j = 0;
17+
while j < v.len() {
18+
let b = f64x2::from_slice(&v[j..]);
19+
let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
20+
sum += b / a;
21+
j += 2
22+
}
23+
*out = sum.horizontal_sum();
24+
}
25+
}
26+
27+
fn mult_atv(v: &[f64], out: &mut [f64]) {
28+
assert!(v.len() == out.len());
29+
assert!(v.len() % 2 == 0);
30+
31+
for (i, out) in out.iter_mut().enumerate() {
32+
let mut sum = f64x2::splat(0.0);
33+
34+
let mut j = 0;
35+
while j < v.len() {
36+
let b = f64x2::from_slice(&v[j..]);
37+
let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
38+
sum += b / a;
39+
j += 2
40+
}
41+
*out = sum.horizontal_sum();
42+
}
43+
}
44+
45+
fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
46+
mult_av(v, tmp);
47+
mult_atv(tmp, out);
48+
}
49+
50+
pub fn spectral_norm(n: usize) -> f64 {
51+
assert!(n % 2 == 0, "only even lengths are accepted");
52+
53+
let mut u = vec![1.0; n];
54+
let mut v = u.clone();
55+
let mut tmp = u.clone();
56+
57+
for _ in 0..10 {
58+
mult_atav(&u, &mut v, &mut tmp);
59+
mult_atav(&v, &mut u, &mut tmp);
60+
}
61+
(dot(&u, &v) / dot(&v, &v)).sqrt()
62+
}
63+
64+
fn dot(x: &[f64], y: &[f64]) -> f64 {
65+
// This is auto-vectorized:
66+
x.iter().zip(y).map(|(&x, &y)| x * y).sum()
67+
}
68+
69+
#[cfg(test)]
70+
#[test]
71+
fn test() {
72+
assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
73+
}
74+
75+
fn main() {
76+
// Empty main to make cargo happy
77+
}

portable-simd/crates/core_simd/src/comparisons.rs

+12
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,17 @@ where
1010
#[inline]
1111
#[must_use = "method returns a new mask and does not mutate the original value"]
1212
pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
13+
// Safety: `self` is a vector, and the result of the comparison
14+
// is always a valid mask.
1315
unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
1416
}
1517

1618
/// Test if each lane is not equal to the corresponding lane in `other`.
1719
#[inline]
1820
#[must_use = "method returns a new mask and does not mutate the original value"]
1921
pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
22+
// Safety: `self` is a vector, and the result of the comparison
23+
// is always a valid mask.
2024
unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
2125
}
2226
}
@@ -30,27 +34,35 @@ where
3034
#[inline]
3135
#[must_use = "method returns a new mask and does not mutate the original value"]
3236
pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
37+
// Safety: `self` is a vector, and the result of the comparison
38+
// is always a valid mask.
3339
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
3440
}
3541

3642
/// Test if each lane is greater than the corresponding lane in `other`.
3743
#[inline]
3844
#[must_use = "method returns a new mask and does not mutate the original value"]
3945
pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
46+
// Safety: `self` is a vector, and the result of the comparison
47+
// is always a valid mask.
4048
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
4149
}
4250

4351
/// Test if each lane is less than or equal to the corresponding lane in `other`.
4452
#[inline]
4553
#[must_use = "method returns a new mask and does not mutate the original value"]
4654
pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
55+
// Safety: `self` is a vector, and the result of the comparison
56+
// is always a valid mask.
4757
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
4858
}
4959

5060
/// Test if each lane is greater than or equal to the corresponding lane in `other`.
5161
#[inline]
5262
#[must_use = "method returns a new mask and does not mutate the original value"]
5363
pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
64+
// Safety: `self` is a vector, and the result of the comparison
65+
// is always a valid mask.
5466
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
5567
}
5668
}

portable-simd/crates/core_simd/src/intrinsics.rs

+57-11
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,55 @@
22
//! crate.
33
//!
44
//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
5+
//!
6+
//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
7+
//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
8+
//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
9+
//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
10+
//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
11+
//! this means that division by poison or undef is like division by zero, which means it inflicts...
12+
//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
13+
//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
14+
//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
15+
//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
16+
//!
17+
//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
18+
//!
19+
//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
520
621
/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
7-
/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
8-
/// is documented alongside each intrinsic.
22+
/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
23+
/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
924
extern "platform-intrinsic" {
1025
/// add/fadd
1126
pub(crate) fn simd_add<T>(x: T, y: T) -> T;
1227

1328
/// sub/fsub
14-
pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
29+
pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
1530

1631
/// mul/fmul
1732
pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
1833

1934
/// udiv/sdiv/fdiv
20-
pub(crate) fn simd_div<T>(x: T, y: T) -> T;
35+
/// ints and uints: {s,u}div incur UB if division by zero occurs.
36+
/// ints: sdiv is UB for int::MIN / -1.
37+
/// floats: fdiv is never UB, but may create NaNs or infinities.
38+
pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
2139

2240
/// urem/srem/frem
23-
pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
41+
/// ints and uints: {s,u}rem incur UB if division by zero occurs.
42+
/// ints: srem is UB for int::MIN / -1.
43+
/// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
44+
pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
2445

2546
/// shl
26-
pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
47+
/// for (u)ints. poison if rhs >= lhs::BITS
48+
pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
2749

28-
/// lshr/ashr
29-
pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
50+
/// ints: ashr
51+
/// uints: lshr
52+
/// poison if rhs >= lhs::BITS
53+
pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
3054

3155
/// and
3256
pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -38,13 +62,19 @@ extern "platform-intrinsic" {
3862
pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
3963

4064
/// fptoui/fptosi/uitofp/sitofp
65+
/// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
66+
/// but the truncated value must fit in the target type or the result is poison.
67+
/// use `simd_as` instead for a cast that performs a saturating conversion.
4168
pub(crate) fn simd_cast<T, U>(x: T) -> U;
4269
/// follows Rust's `T as U` semantics, including saturating float casts
4370
/// which amounts to the same as `simd_cast` for many cases
4471
#[cfg(not(bootstrap))]
4572
pub(crate) fn simd_as<T, U>(x: T) -> U;
4673

4774
/// neg/fneg
75+
/// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
76+
/// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
77+
/// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
4878
pub(crate) fn simd_neg<T>(x: T) -> T;
4979

5080
/// fabs
@@ -54,6 +84,7 @@ extern "platform-intrinsic" {
5484
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
5585
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
5686

87+
// these return Simd<int, N> with the same BITS size as the inputs
5788
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
5889
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
5990
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -62,19 +93,31 @@ extern "platform-intrinsic" {
6293
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
6394

6495
// shufflevector
96+
// idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
6597
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
6698

99+
/// llvm.masked.gather
100+
/// like a loop of pointer reads
101+
/// val: vector of values to select if a lane is masked
102+
/// ptr: vector of pointers to read from
103+
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
104+
/// note, the LLVM intrinsic accepts a mask vector of <N x i1>
105+
/// FIXME: review this if/when we fix up our mask story in general?
67106
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
107+
/// llvm.masked.scatter
108+
/// like gather, but more spicy, as it writes instead of reads
68109
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
69110

70111
// {s,u}add.sat
71112
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
72113

73114
// {s,u}sub.sat
74-
pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
115+
pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
75116

76117
// reductions
118+
// llvm.vector.reduce.{add,fadd}
77119
pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
120+
// llvm.vector.reduce.{mul,fmul}
78121
pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
79122
#[allow(unused)]
80123
pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -91,7 +134,10 @@ extern "platform-intrinsic" {
91134
pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
92135

93136
// select
94-
pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
137+
// first argument is a vector of integers, -1 (all bits 1) is "true"
138+
// logically equivalent to (yes & m) | (no & (m^-1),
139+
// but you can use it on floats.
140+
pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
95141
#[allow(unused)]
96-
pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
142+
pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
97143
}

portable-simd/crates/core_simd/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#![cfg_attr(not(feature = "std"), no_std)]
22
#![feature(
33
const_fn_trait_bound,
4+
convert_float_to_int,
45
decl_macro,
6+
intra_doc_pointers,
57
platform_intrinsics,
68
repr_simd,
79
simd_ffi,

portable-simd/crates/core_simd/src/masks.rs

+13-18
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
)]
1313
mod mask_impl;
1414

15-
use crate::simd::intrinsics;
16-
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
15+
mod to_bitmask;
16+
pub use to_bitmask::ToBitMask;
17+
18+
use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
1719
use core::cmp::Ordering;
1820
use core::{fmt, mem};
1921

@@ -42,6 +44,9 @@ mod sealed {
4244
use sealed::Sealed;
4345

4446
/// Marker trait for types that may be used as SIMD mask elements.
47+
///
48+
/// # Safety
49+
/// Type must be a signed integer.
4550
pub unsafe trait MaskElement: SimdElement + Sealed {}
4651

4752
macro_rules! impl_element {
@@ -149,6 +154,7 @@ where
149154
#[inline]
150155
#[must_use = "method returns a new mask and does not mutate the original value"]
151156
pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
157+
// Safety: the caller must confirm this invariant
152158
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
153159
}
154160

@@ -161,6 +167,7 @@ where
161167
#[must_use = "method returns a new mask and does not mutate the original value"]
162168
pub fn from_int(value: Simd<T, LANES>) -> Self {
163169
assert!(T::valid(value), "all values must be either 0 or -1",);
170+
// Safety: the validity has been checked
164171
unsafe { Self::from_int_unchecked(value) }
165172
}
166173

@@ -179,6 +186,7 @@ where
179186
#[inline]
180187
#[must_use = "method returns a new bool and does not mutate the original value"]
181188
pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
189+
// Safety: the caller must confirm this invariant
182190
unsafe { self.0.test_unchecked(lane) }
183191
}
184192

@@ -190,6 +198,7 @@ where
190198
#[must_use = "method returns a new bool and does not mutate the original value"]
191199
pub fn test(&self, lane: usize) -> bool {
192200
assert!(lane < LANES, "lane index out of range");
201+
// Safety: the lane index has been checked
193202
unsafe { self.test_unchecked(lane) }
194203
}
195204

@@ -199,6 +208,7 @@ where
199208
/// `lane` must be less than `LANES`.
200209
#[inline]
201210
pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
211+
// Safety: the caller must confirm this invariant
202212
unsafe {
203213
self.0.set_unchecked(lane, value);
204214
}
@@ -211,27 +221,12 @@ where
211221
#[inline]
212222
pub fn set(&mut self, lane: usize, value: bool) {
213223
assert!(lane < LANES, "lane index out of range");
224+
// Safety: the lane index has been checked
214225
unsafe {
215226
self.set_unchecked(lane, value);
216227
}
217228
}
218229

219-
/// Convert this mask to a bitmask, with one bit set per lane.
220-
#[cfg(feature = "generic_const_exprs")]
221-
#[inline]
222-
#[must_use = "method returns a new array and does not mutate the original value"]
223-
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
224-
self.0.to_bitmask()
225-
}
226-
227-
/// Convert a bitmask to a mask.
228-
#[cfg(feature = "generic_const_exprs")]
229-
#[inline]
230-
#[must_use = "method returns a new mask and does not mutate the original value"]
231-
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
232-
Self(mask_impl::Mask::from_bitmask(bitmask))
233-
}
234-
235230
/// Returns true if any lane is set, or false otherwise.
236231
#[inline]
237232
#[must_use = "method returns a new bool and does not mutate the original value"]

0 commit comments

Comments
 (0)