Skip to content

Commit f701e0d

Browse files
authored
Rollup merge of rust-lang#94452 - workingjubilee:sync-simd-bitmasks, r=workingjubilee
Sync portable-simd for bitmasks &c. In the ideal case, where everything works easily and nothing has to be rearranged, it is as simple as: - `git subtree pull -P library/portable-simd https://github.com/rust-lang/portable-simd - ${branch}` - write the commit message - `python x.py test --stage 1` to make sure it runs - `git push` to your PR-to-rustc branch If anything borks up this flow, you can fix it with sufficient git wizardry but you are usually better off going back to the source, fixing it, and starting over, before you open the PR. r? `@calebzulawski`
2 parents 44421bb + b9ce766 commit f701e0d

21 files changed

+438
-123
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#![feature(portable_simd)]
2+
3+
use core_simd::simd::*;
4+
5+
fn a(i: usize, j: usize) -> f64 {
6+
((i + j) * (i + j + 1) / 2 + i + 1) as f64
7+
}
8+
9+
fn mult_av(v: &[f64], out: &mut [f64]) {
10+
assert!(v.len() == out.len());
11+
assert!(v.len() % 2 == 0);
12+
13+
for (i, out) in out.iter_mut().enumerate() {
14+
let mut sum = f64x2::splat(0.0);
15+
16+
let mut j = 0;
17+
while j < v.len() {
18+
let b = f64x2::from_slice(&v[j..]);
19+
let a = f64x2::from_array([a(i, j), a(i, j + 1)]);
20+
sum += b / a;
21+
j += 2
22+
}
23+
*out = sum.horizontal_sum();
24+
}
25+
}
26+
27+
fn mult_atv(v: &[f64], out: &mut [f64]) {
28+
assert!(v.len() == out.len());
29+
assert!(v.len() % 2 == 0);
30+
31+
for (i, out) in out.iter_mut().enumerate() {
32+
let mut sum = f64x2::splat(0.0);
33+
34+
let mut j = 0;
35+
while j < v.len() {
36+
let b = f64x2::from_slice(&v[j..]);
37+
let a = f64x2::from_array([a(j, i), a(j + 1, i)]);
38+
sum += b / a;
39+
j += 2
40+
}
41+
*out = sum.horizontal_sum();
42+
}
43+
}
44+
45+
fn mult_atav(v: &[f64], out: &mut [f64], tmp: &mut [f64]) {
46+
mult_av(v, tmp);
47+
mult_atv(tmp, out);
48+
}
49+
50+
pub fn spectral_norm(n: usize) -> f64 {
51+
assert!(n % 2 == 0, "only even lengths are accepted");
52+
53+
let mut u = vec![1.0; n];
54+
let mut v = u.clone();
55+
let mut tmp = u.clone();
56+
57+
for _ in 0..10 {
58+
mult_atav(&u, &mut v, &mut tmp);
59+
mult_atav(&v, &mut u, &mut tmp);
60+
}
61+
(dot(&u, &v) / dot(&v, &v)).sqrt()
62+
}
63+
64+
fn dot(x: &[f64], y: &[f64]) -> f64 {
65+
// This is auto-vectorized:
66+
x.iter().zip(y).map(|(&x, &y)| x * y).sum()
67+
}
68+
69+
#[cfg(test)]
70+
#[test]
71+
fn test() {
72+
assert_eq!(&format!("{:.9}", spectral_norm(100)), "1.274219991");
73+
}
74+
75+
fn main() {
76+
// Empty main to make cargo happy
77+
}

portable-simd/crates/core_simd/src/comparisons.rs

+12
Original file line numberDiff line numberDiff line change
@@ -10,13 +10,17 @@ where
1010
#[inline]
1111
#[must_use = "method returns a new mask and does not mutate the original value"]
1212
pub fn lanes_eq(self, other: Self) -> Mask<T::Mask, LANES> {
13+
// Safety: `self` is a vector, and the result of the comparison
14+
// is always a valid mask.
1315
unsafe { Mask::from_int_unchecked(intrinsics::simd_eq(self, other)) }
1416
}
1517

1618
/// Test if each lane is not equal to the corresponding lane in `other`.
1719
#[inline]
1820
#[must_use = "method returns a new mask and does not mutate the original value"]
1921
pub fn lanes_ne(self, other: Self) -> Mask<T::Mask, LANES> {
22+
// Safety: `self` is a vector, and the result of the comparison
23+
// is always a valid mask.
2024
unsafe { Mask::from_int_unchecked(intrinsics::simd_ne(self, other)) }
2125
}
2226
}
@@ -30,27 +34,35 @@ where
3034
#[inline]
3135
#[must_use = "method returns a new mask and does not mutate the original value"]
3236
pub fn lanes_lt(self, other: Self) -> Mask<T::Mask, LANES> {
37+
// Safety: `self` is a vector, and the result of the comparison
38+
// is always a valid mask.
3339
unsafe { Mask::from_int_unchecked(intrinsics::simd_lt(self, other)) }
3440
}
3541

3642
/// Test if each lane is greater than the corresponding lane in `other`.
3743
#[inline]
3844
#[must_use = "method returns a new mask and does not mutate the original value"]
3945
pub fn lanes_gt(self, other: Self) -> Mask<T::Mask, LANES> {
46+
// Safety: `self` is a vector, and the result of the comparison
47+
// is always a valid mask.
4048
unsafe { Mask::from_int_unchecked(intrinsics::simd_gt(self, other)) }
4149
}
4250

4351
/// Test if each lane is less than or equal to the corresponding lane in `other`.
4452
#[inline]
4553
#[must_use = "method returns a new mask and does not mutate the original value"]
4654
pub fn lanes_le(self, other: Self) -> Mask<T::Mask, LANES> {
55+
// Safety: `self` is a vector, and the result of the comparison
56+
// is always a valid mask.
4757
unsafe { Mask::from_int_unchecked(intrinsics::simd_le(self, other)) }
4858
}
4959

5060
/// Test if each lane is greater than or equal to the corresponding lane in `other`.
5161
#[inline]
5262
#[must_use = "method returns a new mask and does not mutate the original value"]
5363
pub fn lanes_ge(self, other: Self) -> Mask<T::Mask, LANES> {
64+
// Safety: `self` is a vector, and the result of the comparison
65+
// is always a valid mask.
5466
unsafe { Mask::from_int_unchecked(intrinsics::simd_ge(self, other)) }
5567
}
5668
}

portable-simd/crates/core_simd/src/intrinsics.rs

+57-11
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,55 @@
22
//! crate.
33
//!
44
//! The LLVM assembly language is documented here: <https://llvm.org/docs/LangRef.html>
5+
//!
6+
//! A quick glossary of jargon that may appear in this module, mostly paraphrasing LLVM's LangRef:
7+
//! - poison: "undefined behavior as a value". specifically, it is like uninit memory (such as padding bytes). it is "safe" to create poison, BUT
8+
//! poison MUST NOT be observed from safe code, as operations on poison return poison, like NaN. unlike NaN, which has defined comparisons,
9+
//! poison is neither true nor false, and LLVM may also convert it to undef (at which point it is both). so, it can't be conditioned on, either.
10+
//! - undef: "a value that is every value". functionally like poison, insofar as Rust is concerned. poison may become this. note:
11+
//! this means that division by poison or undef is like division by zero, which means it inflicts...
12+
//! - "UB": poison and undef cover most of what people call "UB". "UB" means this operation immediately invalidates the program:
13+
//! LLVM is allowed to lower it to `ud2` or other opcodes that may cause an illegal instruction exception, and this is the "good end".
14+
//! The "bad end" is that LLVM may reverse time to the moment control flow diverged on a path towards undefined behavior,
15+
//! and destroy the other branch, potentially deleting safe code and violating Rust's `unsafe` contract.
16+
//!
17+
//! Note that according to LLVM, vectors are not arrays, but they are equivalent when stored to and loaded from memory.
18+
//!
19+
//! Unless stated otherwise, all intrinsics for binary operations require SIMD vectors of equal types and lengths.
520
621
/// These intrinsics aren't linked directly from LLVM and are mostly undocumented, however they are
7-
/// simply lowered to the matching LLVM instructions by the compiler. The associated instruction
8-
/// is documented alongside each intrinsic.
22+
/// mostly lowered to the matching LLVM instructions by the compiler in a fairly straightforward manner.
23+
/// The associated LLVM instruction or intrinsic is documented alongside each Rust intrinsic function.
924
extern "platform-intrinsic" {
1025
/// add/fadd
1126
pub(crate) fn simd_add<T>(x: T, y: T) -> T;
1227

1328
/// sub/fsub
14-
pub(crate) fn simd_sub<T>(x: T, y: T) -> T;
29+
pub(crate) fn simd_sub<T>(lhs: T, rhs: T) -> T;
1530

1631
/// mul/fmul
1732
pub(crate) fn simd_mul<T>(x: T, y: T) -> T;
1833

1934
/// udiv/sdiv/fdiv
20-
pub(crate) fn simd_div<T>(x: T, y: T) -> T;
35+
/// ints and uints: {s,u}div incur UB if division by zero occurs.
36+
/// ints: sdiv is UB for int::MIN / -1.
37+
/// floats: fdiv is never UB, but may create NaNs or infinities.
38+
pub(crate) fn simd_div<T>(lhs: T, rhs: T) -> T;
2139

2240
/// urem/srem/frem
23-
pub(crate) fn simd_rem<T>(x: T, y: T) -> T;
41+
/// ints and uints: {s,u}rem incur UB if division by zero occurs.
42+
/// ints: srem is UB for int::MIN / -1.
43+
/// floats: frem is equivalent to libm::fmod in the "default" floating point environment, sans errno.
44+
pub(crate) fn simd_rem<T>(lhs: T, rhs: T) -> T;
2445

2546
/// shl
26-
pub(crate) fn simd_shl<T>(x: T, y: T) -> T;
47+
/// for (u)ints. poison if rhs >= lhs::BITS
48+
pub(crate) fn simd_shl<T>(lhs: T, rhs: T) -> T;
2749

28-
/// lshr/ashr
29-
pub(crate) fn simd_shr<T>(x: T, y: T) -> T;
50+
/// ints: ashr
51+
/// uints: lshr
52+
/// poison if rhs >= lhs::BITS
53+
pub(crate) fn simd_shr<T>(lhs: T, rhs: T) -> T;
3054

3155
/// and
3256
pub(crate) fn simd_and<T>(x: T, y: T) -> T;
@@ -38,12 +62,18 @@ extern "platform-intrinsic" {
3862
pub(crate) fn simd_xor<T>(x: T, y: T) -> T;
3963

4064
/// fptoui/fptosi/uitofp/sitofp
65+
/// casting floats to integers is truncating, so it is safe to convert values like e.g. 1.5
66+
/// but the truncated value must fit in the target type or the result is poison.
67+
/// use `simd_as` instead for a cast that performs a saturating conversion.
4168
pub(crate) fn simd_cast<T, U>(x: T) -> U;
4269
/// follows Rust's `T as U` semantics, including saturating float casts
4370
/// which amounts to the same as `simd_cast` for many cases
4471
pub(crate) fn simd_as<T, U>(x: T) -> U;
4572

4673
/// neg/fneg
74+
/// ints: ultimately becomes a call to cg_ssa's BuilderMethods::neg. cg_llvm equates this to `simd_sub(Simd::splat(0), x)`.
75+
/// floats: LLVM's fneg, which changes the floating point sign bit. Some arches have instructions for it.
76+
/// Rust panics for Neg::neg(int::MIN) due to overflow, but it is not UB in LLVM without `nsw`.
4777
pub(crate) fn simd_neg<T>(x: T) -> T;
4878

4979
/// fabs
@@ -53,6 +83,7 @@ extern "platform-intrinsic" {
5383
pub(crate) fn simd_fmin<T>(x: T, y: T) -> T;
5484
pub(crate) fn simd_fmax<T>(x: T, y: T) -> T;
5585

86+
// these return Simd<int, N> with the same BITS size as the inputs
5687
pub(crate) fn simd_eq<T, U>(x: T, y: T) -> U;
5788
pub(crate) fn simd_ne<T, U>(x: T, y: T) -> U;
5889
pub(crate) fn simd_lt<T, U>(x: T, y: T) -> U;
@@ -61,19 +92,31 @@ extern "platform-intrinsic" {
6192
pub(crate) fn simd_ge<T, U>(x: T, y: T) -> U;
6293

6394
// shufflevector
95+
// idx: LLVM calls it a "shuffle mask vector constant", a vector of i32s
6496
pub(crate) fn simd_shuffle<T, U, V>(x: T, y: T, idx: U) -> V;
6597

98+
/// llvm.masked.gather
99+
/// like a loop of pointer reads
100+
/// val: vector of values to select if a lane is masked
101+
/// ptr: vector of pointers to read from
102+
/// mask: a "wide" mask of integers, selects as if simd_select(mask, read(ptr), val)
103+
/// note, the LLVM intrinsic accepts a mask vector of <N x i1>
104+
/// FIXME: review this if/when we fix up our mask story in general?
66105
pub(crate) fn simd_gather<T, U, V>(val: T, ptr: U, mask: V) -> T;
106+
/// llvm.masked.scatter
107+
/// like gather, but more spicy, as it writes instead of reads
67108
pub(crate) fn simd_scatter<T, U, V>(val: T, ptr: U, mask: V);
68109

69110
// {s,u}add.sat
70111
pub(crate) fn simd_saturating_add<T>(x: T, y: T) -> T;
71112

72113
// {s,u}sub.sat
73-
pub(crate) fn simd_saturating_sub<T>(x: T, y: T) -> T;
114+
pub(crate) fn simd_saturating_sub<T>(lhs: T, rhs: T) -> T;
74115

75116
// reductions
117+
// llvm.vector.reduce.{add,fadd}
76118
pub(crate) fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
119+
// llvm.vector.reduce.{mul,fmul}
77120
pub(crate) fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
78121
#[allow(unused)]
79122
pub(crate) fn simd_reduce_all<T>(x: T) -> bool;
@@ -90,7 +133,10 @@ extern "platform-intrinsic" {
90133
pub(crate) fn simd_bitmask<T, U>(x: T) -> U;
91134

92135
// select
93-
pub(crate) fn simd_select<M, T>(m: M, a: T, b: T) -> T;
136+
// first argument is a vector of integers, -1 (all bits 1) is "true"
137+
// logically equivalent to (yes & m) | (no & (m^-1),
138+
// but you can use it on floats.
139+
pub(crate) fn simd_select<M, T>(m: M, yes: T, no: T) -> T;
94140
#[allow(unused)]
95-
pub(crate) fn simd_select_bitmask<M, T>(m: M, a: T, b: T) -> T;
141+
pub(crate) fn simd_select_bitmask<M, T>(m: M, yes: T, no: T) -> T;
96142
}

portable-simd/crates/core_simd/src/lib.rs

+2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#![cfg_attr(not(feature = "std"), no_std)]
22
#![feature(
33
const_fn_trait_bound,
4+
convert_float_to_int,
45
decl_macro,
6+
intra_doc_pointers,
57
platform_intrinsics,
68
repr_simd,
79
simd_ffi,

portable-simd/crates/core_simd/src/masks.rs

+13-18
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,10 @@
1212
)]
1313
mod mask_impl;
1414

15-
use crate::simd::intrinsics;
16-
use crate::simd::{LaneCount, Simd, SimdElement, SupportedLaneCount};
15+
mod to_bitmask;
16+
pub use to_bitmask::ToBitMask;
17+
18+
use crate::simd::{intrinsics, LaneCount, Simd, SimdElement, SupportedLaneCount};
1719
use core::cmp::Ordering;
1820
use core::{fmt, mem};
1921

@@ -42,6 +44,9 @@ mod sealed {
4244
use sealed::Sealed;
4345

4446
/// Marker trait for types that may be used as SIMD mask elements.
47+
///
48+
/// # Safety
49+
/// Type must be a signed integer.
4550
pub unsafe trait MaskElement: SimdElement + Sealed {}
4651

4752
macro_rules! impl_element {
@@ -149,6 +154,7 @@ where
149154
#[inline]
150155
#[must_use = "method returns a new mask and does not mutate the original value"]
151156
pub unsafe fn from_int_unchecked(value: Simd<T, LANES>) -> Self {
157+
// Safety: the caller must confirm this invariant
152158
unsafe { Self(mask_impl::Mask::from_int_unchecked(value)) }
153159
}
154160

@@ -161,6 +167,7 @@ where
161167
#[must_use = "method returns a new mask and does not mutate the original value"]
162168
pub fn from_int(value: Simd<T, LANES>) -> Self {
163169
assert!(T::valid(value), "all values must be either 0 or -1",);
170+
// Safety: the validity has been checked
164171
unsafe { Self::from_int_unchecked(value) }
165172
}
166173

@@ -179,6 +186,7 @@ where
179186
#[inline]
180187
#[must_use = "method returns a new bool and does not mutate the original value"]
181188
pub unsafe fn test_unchecked(&self, lane: usize) -> bool {
189+
// Safety: the caller must confirm this invariant
182190
unsafe { self.0.test_unchecked(lane) }
183191
}
184192

@@ -190,6 +198,7 @@ where
190198
#[must_use = "method returns a new bool and does not mutate the original value"]
191199
pub fn test(&self, lane: usize) -> bool {
192200
assert!(lane < LANES, "lane index out of range");
201+
// Safety: the lane index has been checked
193202
unsafe { self.test_unchecked(lane) }
194203
}
195204

@@ -199,6 +208,7 @@ where
199208
/// `lane` must be less than `LANES`.
200209
#[inline]
201210
pub unsafe fn set_unchecked(&mut self, lane: usize, value: bool) {
211+
// Safety: the caller must confirm this invariant
202212
unsafe {
203213
self.0.set_unchecked(lane, value);
204214
}
@@ -211,27 +221,12 @@ where
211221
#[inline]
212222
pub fn set(&mut self, lane: usize, value: bool) {
213223
assert!(lane < LANES, "lane index out of range");
224+
// Safety: the lane index has been checked
214225
unsafe {
215226
self.set_unchecked(lane, value);
216227
}
217228
}
218229

219-
/// Convert this mask to a bitmask, with one bit set per lane.
220-
#[cfg(feature = "generic_const_exprs")]
221-
#[inline]
222-
#[must_use = "method returns a new array and does not mutate the original value"]
223-
pub fn to_bitmask(self) -> [u8; LaneCount::<LANES>::BITMASK_LEN] {
224-
self.0.to_bitmask()
225-
}
226-
227-
/// Convert a bitmask to a mask.
228-
#[cfg(feature = "generic_const_exprs")]
229-
#[inline]
230-
#[must_use = "method returns a new mask and does not mutate the original value"]
231-
pub fn from_bitmask(bitmask: [u8; LaneCount::<LANES>::BITMASK_LEN]) -> Self {
232-
Self(mask_impl::Mask::from_bitmask(bitmask))
233-
}
234-
235230
/// Returns true if any lane is set, or false otherwise.
236231
#[inline]
237232
#[must_use = "method returns a new bool and does not mutate the original value"]

0 commit comments

Comments
 (0)