Skip to content

Commit 913612c

Browse files
authored
Merge pull request rust-lang#4193 from bjorn3/arm64_vpmaxq_u8
Implement vpmaxq_u8 on aarch64
2 parents a3dd764 + 4303a14 commit 913612c

File tree

4 files changed

+124
-13
lines changed

4 files changed

+124
-13
lines changed

src/tools/miri/src/shims/aarch64.rs

+78
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
use rustc_middle::mir::BinOp;
2+
use rustc_middle::ty::Ty;
3+
use rustc_span::Symbol;
4+
use rustc_target::callconv::{Conv, FnAbi};
5+
6+
use crate::*;
7+
8+
impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
9+
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
10+
fn emulate_aarch64_intrinsic(
11+
&mut self,
12+
link_name: Symbol,
13+
abi: &FnAbi<'tcx, Ty<'tcx>>,
14+
args: &[OpTy<'tcx>],
15+
dest: &MPlaceTy<'tcx>,
16+
) -> InterpResult<'tcx, EmulateItemResult> {
17+
let this = self.eval_context_mut();
18+
// Prefix should have already been checked.
19+
let unprefixed_name = link_name.as_str().strip_prefix("llvm.aarch64.").unwrap();
20+
match unprefixed_name {
21+
"isb" => {
22+
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
23+
let arg = this.read_scalar(arg)?.to_i32()?;
24+
match arg {
25+
// SY ("full system scope")
26+
15 => {
27+
this.yield_active_thread();
28+
}
29+
_ => {
30+
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
31+
}
32+
}
33+
}
34+
35+
// Used to implement the vpmaxq_u8 function.
36+
// Computes the maximum of adjacent pairs; the first half of the output is produced from the
37+
// `left` input, the second half of the output from the `right` input.
38+
// https://developer.arm.com/architectures/instruction-sets/intrinsics/vpmaxq_u8
39+
"neon.umaxp.v16i8" => {
40+
let [left, right] = this.check_shim(abi, Conv::C, link_name, args)?;
41+
42+
let (left, left_len) = this.project_to_simd(left)?;
43+
let (right, right_len) = this.project_to_simd(right)?;
44+
let (dest, lane_count) = this.project_to_simd(dest)?;
45+
assert_eq!(left_len, right_len);
46+
assert_eq!(lane_count, left_len);
47+
48+
for lane_idx in 0..lane_count {
49+
let src = if lane_idx < (lane_count / 2) { &left } else { &right };
50+
let src_idx = lane_idx.strict_rem(lane_count / 2);
51+
52+
let lhs_lane =
53+
this.read_immediate(&this.project_index(src, src_idx.strict_mul(2))?)?;
54+
let rhs_lane = this.read_immediate(
55+
&this.project_index(src, src_idx.strict_mul(2).strict_add(1))?,
56+
)?;
57+
58+
// Compute `if lhs > rhs { lhs } else { rhs }`, i.e., `max`.
59+
let res_lane = if this
60+
.binary_op(BinOp::Gt, &lhs_lane, &rhs_lane)?
61+
.to_scalar()
62+
.to_bool()?
63+
{
64+
lhs_lane
65+
} else {
66+
rhs_lane
67+
};
68+
69+
let dest = this.project_index(&dest, lane_idx)?;
70+
this.write_immediate(*res_lane, &dest)?;
71+
}
72+
}
73+
74+
_ => return interp_ok(EmulateItemResult::NotSupported),
75+
}
76+
interp_ok(EmulateItemResult::NeedsReturn)
77+
}
78+
}

src/tools/miri/src/shims/foreign_items.rs

+5-13
Original file line numberDiff line numberDiff line change
@@ -981,20 +981,12 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> {
981981
this, link_name, abi, args, dest,
982982
);
983983
}
984-
// FIXME: Move these to an `arm` submodule.
985-
"llvm.aarch64.isb" if this.tcx.sess.target.arch == "aarch64" => {
986-
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
987-
let arg = this.read_scalar(arg)?.to_i32()?;
988-
match arg {
989-
// SY ("full system scope")
990-
15 => {
991-
this.yield_active_thread();
992-
}
993-
_ => {
994-
throw_unsup_format!("unsupported llvm.aarch64.isb argument {}", arg);
995-
}
996-
}
984+
name if name.starts_with("llvm.aarch64.") && this.tcx.sess.target.arch == "aarch64" => {
985+
return shims::aarch64::EvalContextExt::emulate_aarch64_intrinsic(
986+
this, link_name, abi, args, dest,
987+
);
997988
}
989+
// FIXME: Move this to an `arm` submodule.
998990
"llvm.arm.hint" if this.tcx.sess.target.arch == "arm" => {
999991
let [arg] = this.check_shim(abi, Conv::C, link_name, args)?;
1000992
let arg = this.read_scalar(arg)?.to_i32()?;

src/tools/miri/src/shims/mod.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![warn(clippy::arithmetic_side_effects)]
22

3+
mod aarch64;
34
mod alloc;
45
mod backtrace;
56
mod files;
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
// We're testing aarch64 target specific features
2+
//@only-target: aarch64
3+
//@compile-flags: -C target-feature=+neon
4+
5+
use std::arch::aarch64::*;
6+
use std::arch::is_aarch64_feature_detected;
7+
8+
fn main() {
9+
assert!(is_aarch64_feature_detected!("neon"));
10+
11+
unsafe {
12+
test_neon();
13+
}
14+
}
15+
16+
#[target_feature(enable = "neon")]
17+
unsafe fn test_neon() {
18+
// Adapted from library/stdarch/crates/core_arch/src/aarch64/neon/mod.rs
19+
unsafe fn test_vpmaxq_u8() {
20+
let a = vld1q_u8([1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8].as_ptr());
21+
let b = vld1q_u8([0, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
22+
let e = [2, 4, 6, 8, 2, 4, 6, 8, 3, 5, 7, 9, 3, 5, 7, 9];
23+
let mut r = [0; 16];
24+
vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
25+
assert_eq!(r, e);
26+
}
27+
test_vpmaxq_u8();
28+
29+
unsafe fn test_vpmaxq_u8_is_unsigned() {
30+
let a = vld1q_u8(
31+
[255, 0, 253, 252, 251, 250, 249, 248, 255, 254, 253, 252, 251, 250, 249, 248].as_ptr(),
32+
);
33+
let b = vld1q_u8([254, 3, 2, 5, 4, 7, 6, 9, 0, 3, 2, 5, 4, 7, 6, 9].as_ptr());
34+
let e = [255, 253, 251, 249, 255, 253, 251, 249, 254, 5, 7, 9, 3, 5, 7, 9];
35+
let mut r = [0; 16];
36+
vst1q_u8(r.as_mut_ptr(), vpmaxq_u8(a, b));
37+
assert_eq!(r, e);
38+
}
39+
test_vpmaxq_u8_is_unsigned();
40+
}

0 commit comments

Comments
 (0)