Skip to content

Commit b004312

Browse files
committed
Implement arm64 vaddlvq_u8 and vld1q_u8_x4 vendor intrinsics
This is required for using the bytecount crate on arm64.
1 parent ed91b73 commit b004312

File tree

1 file changed

+24
-0
lines changed

1 file changed

+24
-0
lines changed

src/intrinsics/llvm_aarch64.rs

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,14 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
1717
fx.bcx.ins().fence();
1818
}
1919

20+
"llvm.aarch64.neon.ld1x4.v16i8.p0i8" => {
21+
intrinsic_args!(fx, args => (ptr); intrinsic);
22+
23+
let ptr = ptr.load_scalar(fx);
24+
let val = CPlace::for_ptr(Pointer::new(ptr), ret.layout()).to_cvalue(fx);
25+
ret.write_cvalue(fx, val);
26+
}
27+
2028
_ if intrinsic.starts_with("llvm.aarch64.neon.abs.v") => {
2129
intrinsic_args!(fx, args => (a); intrinsic);
2230

@@ -115,6 +123,22 @@ pub(crate) fn codegen_aarch64_llvm_intrinsic_call<'tcx>(
115123
);
116124
}
117125

126+
"llvm.aarch64.neon.uaddlv.i32.v16i8" => {
127+
intrinsic_args!(fx, args => (v); intrinsic);
128+
129+
let mut res_val = fx.bcx.ins().iconst(types::I16, 0);
130+
for lane_idx in 0..16 {
131+
let lane = v.value_lane(fx, lane_idx).load_scalar(fx);
132+
let lane = fx.bcx.ins().uextend(types::I16, lane);
133+
res_val = fx.bcx.ins().iadd(res_val, lane);
134+
}
135+
let res = CValue::by_val(
136+
fx.bcx.ins().uextend(types::I32, res_val),
137+
fx.layout_of(fx.tcx.types.u32),
138+
);
139+
ret.write_cvalue(fx, res);
140+
}
141+
118142
_ if intrinsic.starts_with("llvm.aarch64.neon.faddv.f32.v") => {
119143
intrinsic_args!(fx, args => (v); intrinsic);
120144

0 commit comments

Comments
 (0)