Skip to content

Commit a456149

Browse files
committed
Auto merge of rust-lang#3086 - eduardosm:x86-sse3-intrinsics, r=RalfJung
Implement SSE3 and SSSE3 intrinsics
2 parents 935ced5 + 2c13713 commit a456149

File tree

4 files changed

+737
-0
lines changed

4 files changed

+737
-0
lines changed

src/tools/miri/src/shims/x86/mod.rs

+53
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ use shims::foreign_items::EmulateByNameResult;
99

1010
mod sse;
1111
mod sse2;
12+
mod sse3;
13+
mod ssse3;
1214

1315
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
1416
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
@@ -88,6 +90,16 @@ pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
8890
this, link_name, abi, args, dest,
8991
);
9092
}
93+
name if name.starts_with("sse3.") => {
94+
return sse3::EvalContextExt::emulate_x86_sse3_intrinsic(
95+
this, link_name, abi, args, dest,
96+
);
97+
}
98+
name if name.starts_with("ssse3.") => {
99+
return ssse3::EvalContextExt::emulate_x86_ssse3_intrinsic(
100+
this, link_name, abi, args, dest,
101+
);
102+
}
91103
_ => return Ok(EmulateByNameResult::NotSupported),
92104
}
93105
Ok(EmulateByNameResult::NeedsJumping)
@@ -286,3 +298,44 @@ fn bin_op_simd_float_all<'tcx, F: rustc_apfloat::Float>(
286298

287299
Ok(())
288300
}
301+
302+
/// Horizontaly performs `which` operation on adjacent values of
303+
/// `left` and `right` SIMD vectors and stores the result in `dest`.
304+
fn horizontal_bin_op<'tcx>(
305+
this: &mut crate::MiriInterpCx<'_, 'tcx>,
306+
which: mir::BinOp,
307+
saturating: bool,
308+
left: &OpTy<'tcx, Provenance>,
309+
right: &OpTy<'tcx, Provenance>,
310+
dest: &PlaceTy<'tcx, Provenance>,
311+
) -> InterpResult<'tcx, ()> {
312+
let (left, left_len) = this.operand_to_simd(left)?;
313+
let (right, right_len) = this.operand_to_simd(right)?;
314+
let (dest, dest_len) = this.place_to_simd(dest)?;
315+
316+
assert_eq!(dest_len, left_len);
317+
assert_eq!(dest_len, right_len);
318+
assert_eq!(dest_len % 2, 0);
319+
320+
let middle = dest_len / 2;
321+
for i in 0..dest_len {
322+
// `i` is the index in `dest`
323+
// `j` is the index of the 2-item chunk in `src`
324+
let (j, src) =
325+
if i < middle { (i, &left) } else { (i.checked_sub(middle).unwrap(), &right) };
326+
// `base_i` is the index of the first item of the 2-item chunk in `src`
327+
let base_i = j.checked_mul(2).unwrap();
328+
let lhs = this.read_immediate(&this.project_index(src, base_i)?)?;
329+
let rhs = this.read_immediate(&this.project_index(src, base_i.checked_add(1).unwrap())?)?;
330+
331+
let res = if saturating {
332+
Immediate::from(this.saturating_arith(which, &lhs, &rhs)?)
333+
} else {
334+
*this.wrapping_binary_op(which, &lhs, &rhs)?
335+
};
336+
337+
this.write_immediate(res, &this.project_index(&dest, i)?)?;
338+
}
339+
340+
Ok(())
341+
}

src/tools/miri/src/shims/x86/sse3.rs

+90
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
use rustc_middle::mir;
2+
use rustc_span::Symbol;
3+
use rustc_target::abi::Align;
4+
use rustc_target::spec::abi::Abi;
5+
6+
use super::horizontal_bin_op;
7+
use crate::*;
8+
use shims::foreign_items::EmulateByNameResult;
9+
10+
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
11+
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
12+
crate::MiriInterpCxExt<'mir, 'tcx>
13+
{
14+
fn emulate_x86_sse3_intrinsic(
15+
&mut self,
16+
link_name: Symbol,
17+
abi: Abi,
18+
args: &[OpTy<'tcx, Provenance>],
19+
dest: &PlaceTy<'tcx, Provenance>,
20+
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
21+
let this = self.eval_context_mut();
22+
// Prefix should have already been checked.
23+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse3.").unwrap();
24+
25+
match unprefixed_name {
26+
// Used to implement the _mm_addsub_ps and _mm_addsub_pd functions.
27+
// Alternatingly add and subtract floating point (f32 or f64) from
28+
// `left` and `right`
29+
"addsub.ps" | "addsub.pd" => {
30+
let [left, right] =
31+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
32+
33+
let (left, left_len) = this.operand_to_simd(left)?;
34+
let (right, right_len) = this.operand_to_simd(right)?;
35+
let (dest, dest_len) = this.place_to_simd(dest)?;
36+
37+
assert_eq!(dest_len, left_len);
38+
assert_eq!(dest_len, right_len);
39+
40+
for i in 0..dest_len {
41+
let left = this.read_immediate(&this.project_index(&left, i)?)?;
42+
let right = this.read_immediate(&this.project_index(&right, i)?)?;
43+
let dest = this.project_index(&dest, i)?;
44+
45+
// Even elements are subtracted and odd elements are added.
46+
let op = if i % 2 == 0 { mir::BinOp::Sub } else { mir::BinOp::Add };
47+
let res = this.wrapping_binary_op(op, &left, &right)?;
48+
49+
this.write_immediate(*res, &dest)?;
50+
}
51+
}
52+
// Used to implement the _mm_h{add,sub}_p{s,d} functions.
53+
// Horizontally add/subtract adjacent floating point values
54+
// in `left` and `right`.
55+
"hadd.ps" | "hadd.pd" | "hsub.ps" | "hsub.pd" => {
56+
let [left, right] =
57+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
58+
59+
let which = match unprefixed_name {
60+
"hadd.ps" | "hadd.pd" => mir::BinOp::Add,
61+
"hsub.ps" | "hsub.pd" => mir::BinOp::Sub,
62+
_ => unreachable!(),
63+
};
64+
65+
horizontal_bin_op(this, which, /*saturating*/ false, left, right, dest)?;
66+
}
67+
// Used to implement the _mm_lddqu_si128 function.
68+
// Reads a 128-bit vector from an unaligned pointer. This intrinsic
69+
// is expected to perform better than a regular unaligned read when
70+
// the data crosses a cache line, but for Miri this is just a regular
71+
// unaligned read.
72+
"ldu.dq" => {
73+
let [src_ptr] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
74+
let src_ptr = this.read_pointer(src_ptr)?;
75+
let dest = dest.force_mplace(this)?;
76+
77+
this.mem_copy(
78+
src_ptr,
79+
Align::ONE,
80+
dest.ptr(),
81+
Align::ONE,
82+
dest.layout.size,
83+
/*nonoverlapping*/ true,
84+
)?;
85+
}
86+
_ => return Ok(EmulateByNameResult::NotSupported),
87+
}
88+
Ok(EmulateByNameResult::NeedsJumping)
89+
}
90+
}

src/tools/miri/src/shims/x86/ssse3.rs

+199
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,199 @@
1+
use rustc_middle::mir;
2+
use rustc_span::Symbol;
3+
use rustc_target::spec::abi::Abi;
4+
5+
use super::horizontal_bin_op;
6+
use crate::*;
7+
use shims::foreign_items::EmulateByNameResult;
8+
9+
impl<'mir, 'tcx: 'mir> EvalContextExt<'mir, 'tcx> for crate::MiriInterpCx<'mir, 'tcx> {}
10+
pub(super) trait EvalContextExt<'mir, 'tcx: 'mir>:
11+
crate::MiriInterpCxExt<'mir, 'tcx>
12+
{
13+
fn emulate_x86_ssse3_intrinsic(
14+
&mut self,
15+
link_name: Symbol,
16+
abi: Abi,
17+
args: &[OpTy<'tcx, Provenance>],
18+
dest: &PlaceTy<'tcx, Provenance>,
19+
) -> InterpResult<'tcx, EmulateByNameResult<'mir, 'tcx>> {
20+
let this = self.eval_context_mut();
21+
// Prefix should have already been checked.
22+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.ssse3.").unwrap();
23+
24+
match unprefixed_name {
25+
// Used to implement the _mm_abs_epi{8,16,32} functions.
26+
// Calculates the absolute value of packed 8/16/32-bit integers.
27+
"pabs.b.128" | "pabs.w.128" | "pabs.d.128" => {
28+
let [op] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
29+
30+
let (op, op_len) = this.operand_to_simd(op)?;
31+
let (dest, dest_len) = this.place_to_simd(dest)?;
32+
33+
assert_eq!(op_len, dest_len);
34+
35+
for i in 0..dest_len {
36+
let op = this.read_scalar(&this.project_index(&op, i)?)?;
37+
let dest = this.project_index(&dest, i)?;
38+
39+
// Converting to a host "i128" works since the input is always signed.
40+
let res = op.to_int(dest.layout.size)?.unsigned_abs();
41+
42+
this.write_scalar(Scalar::from_uint(res, dest.layout.size), &dest)?;
43+
}
44+
}
45+
// Used to implement the _mm_shuffle_epi8 intrinsic.
46+
// Shuffles bytes from `left` using `right` as pattern.
47+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_shuffle_epi8
48+
"pshuf.b.128" => {
49+
let [left, right] =
50+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
51+
52+
let (left, left_len) = this.operand_to_simd(left)?;
53+
let (right, right_len) = this.operand_to_simd(right)?;
54+
let (dest, dest_len) = this.place_to_simd(dest)?;
55+
56+
assert_eq!(dest_len, left_len);
57+
assert_eq!(dest_len, right_len);
58+
59+
for i in 0..dest_len {
60+
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_u8()?;
61+
let dest = this.project_index(&dest, i)?;
62+
63+
let res = if right & 0x80 == 0 {
64+
let j = right % 16; // index wraps around
65+
this.read_scalar(&this.project_index(&left, j.into())?)?
66+
} else {
67+
// If the highest bit in `right` is 1, write zero.
68+
Scalar::from_u8(0)
69+
};
70+
71+
this.write_scalar(res, &dest)?;
72+
}
73+
}
74+
// Used to implement the _mm_h{add,adds,sub}_epi{16,32} functions.
75+
// Horizontally add / add with saturation / subtract adjacent 16/32-bit
76+
// integer values in `left` and `right`.
77+
"phadd.w.128" | "phadd.sw.128" | "phadd.d.128" | "phsub.w.128" | "phsub.sw.128"
78+
| "phsub.d.128" => {
79+
let [left, right] =
80+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
81+
82+
let (which, saturating) = match unprefixed_name {
83+
"phadd.w.128" | "phadd.d.128" => (mir::BinOp::Add, false),
84+
"phadd.sw.128" => (mir::BinOp::Add, true),
85+
"phsub.w.128" | "phsub.d.128" => (mir::BinOp::Sub, false),
86+
"phsub.sw.128" => (mir::BinOp::Sub, true),
87+
_ => unreachable!(),
88+
};
89+
90+
horizontal_bin_op(this, which, saturating, left, right, dest)?;
91+
}
92+
// Used to implement the _mm_maddubs_epi16 function.
93+
// Multiplies packed 8-bit unsigned integers from `left` and packed
94+
// signed 8-bit integers from `right` into 16-bit signed integers. Then,
95+
// the saturating sum of the products with indices `2*i` and `2*i+1`
96+
// produces the output at index `i`.
97+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_maddubs_epi16
98+
"pmadd.ub.sw.128" => {
99+
let [left, right] =
100+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
101+
102+
let (left, left_len) = this.operand_to_simd(left)?;
103+
let (right, right_len) = this.operand_to_simd(right)?;
104+
let (dest, dest_len) = this.place_to_simd(dest)?;
105+
106+
assert_eq!(left_len, right_len);
107+
assert_eq!(dest_len.checked_mul(2).unwrap(), left_len);
108+
109+
for i in 0..dest_len {
110+
let j1 = i.checked_mul(2).unwrap();
111+
let left1 = this.read_scalar(&this.project_index(&left, j1)?)?.to_u8()?;
112+
let right1 = this.read_scalar(&this.project_index(&right, j1)?)?.to_i8()?;
113+
114+
let j2 = j1.checked_add(1).unwrap();
115+
let left2 = this.read_scalar(&this.project_index(&left, j2)?)?.to_u8()?;
116+
let right2 = this.read_scalar(&this.project_index(&right, j2)?)?.to_i8()?;
117+
118+
let dest = this.project_index(&dest, i)?;
119+
120+
// Multiplication of a u8 and an i8 into an i16 cannot overflow.
121+
let mul1 = i16::from(left1).checked_mul(right1.into()).unwrap();
122+
let mul2 = i16::from(left2).checked_mul(right2.into()).unwrap();
123+
let res = mul1.saturating_add(mul2);
124+
125+
this.write_scalar(Scalar::from_i16(res), &dest)?;
126+
}
127+
}
128+
// Used to implement the _mm_mulhrs_epi16 function.
129+
// Multiplies packed 16-bit signed integer values, truncates the 32-bit
130+
// product to the 18 most significant bits by right-shifting, and then
131+
// divides the 18-bit value by 2 (rounding to nearest) by first adding
132+
// 1 and then taking the bits `1..=16`.
133+
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16
134+
"pmul.hr.sw.128" => {
135+
let [left, right] =
136+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
137+
138+
let (left, left_len) = this.operand_to_simd(left)?;
139+
let (right, right_len) = this.operand_to_simd(right)?;
140+
let (dest, dest_len) = this.place_to_simd(dest)?;
141+
142+
assert_eq!(dest_len, left_len);
143+
assert_eq!(dest_len, right_len);
144+
145+
for i in 0..dest_len {
146+
let left = this.read_scalar(&this.project_index(&left, i)?)?.to_i16()?;
147+
let right = this.read_scalar(&this.project_index(&right, i)?)?.to_i16()?;
148+
let dest = this.project_index(&dest, i)?;
149+
150+
let res = (i32::from(left).checked_mul(right.into()).unwrap() >> 14)
151+
.checked_add(1)
152+
.unwrap()
153+
>> 1;
154+
155+
// The result of this operation can overflow a signed 16-bit integer.
156+
// When `left` and `right` are -0x8000, the result is 0x8000.
157+
#[allow(clippy::cast_possible_truncation)]
158+
let res = res as i16;
159+
160+
this.write_scalar(Scalar::from_i16(res), &dest)?;
161+
}
162+
}
163+
// Used to implement the _mm_sign_epi{8,16,32} functions.
164+
// Negates elements from `left` when the corresponding element in
165+
// `right` is negative. If an element from `right` is zero, zero
166+
// is writen to the corresponding output element.
167+
// Basically, we multiply `left` with `right.signum()`.
168+
"psign.b.128" | "psign.w.128" | "psign.d.128" => {
169+
let [left, right] =
170+
this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
171+
172+
let (left, left_len) = this.operand_to_simd(left)?;
173+
let (right, right_len) = this.operand_to_simd(right)?;
174+
let (dest, dest_len) = this.place_to_simd(dest)?;
175+
176+
assert_eq!(dest_len, left_len);
177+
assert_eq!(dest_len, right_len);
178+
179+
for i in 0..dest_len {
180+
let dest = this.project_index(&dest, i)?;
181+
let left = this.read_immediate(&this.project_index(&left, i)?)?;
182+
let right = this
183+
.read_scalar(&this.project_index(&right, i)?)?
184+
.to_int(dest.layout.size)?;
185+
186+
let res = this.wrapping_binary_op(
187+
mir::BinOp::Mul,
188+
&left,
189+
&ImmTy::from_int(right.signum(), dest.layout),
190+
)?;
191+
192+
this.write_immediate(*res, &dest)?;
193+
}
194+
}
195+
_ => return Ok(EmulateByNameResult::NotSupported),
196+
}
197+
Ok(EmulateByNameResult::NeedsJumping)
198+
}
199+
}

0 commit comments

Comments
 (0)