Skip to content

Commit e5ba1e8

Browse files
committed
Implement llvm intrinsics necessary for rav1e
Fixes #1399
1 parent ed8c515 commit e5ba1e8

File tree

1 file changed

+34
-0
lines changed

1 file changed

+34
-0
lines changed

src/intrinsics/llvm_x86.rs

+34
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,40 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
310310
let val = CValue::by_val_pair(cb_out, c, layout);
311311
ret.write_cvalue(fx, val);
312312
}
313+
"llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
314+
intrinsic_args!(fx, args => (a, b); intrinsic);
315+
316+
// FIXME use vector instructions when possible
317+
simd_pair_for_each_lane(
318+
fx,
319+
a,
320+
b,
321+
ret,
322+
&|fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
323+
// (a + b + 1) >> 1
324+
let lane_ty = fx.bcx.func.dfg.value_type(a_lane);
325+
let a_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), a_lane);
326+
let b_lane = fx.bcx.ins().uextend(lane_ty.double_width().unwrap(), b_lane);
327+
let sum = fx.bcx.ins().iadd(a_lane, b_lane);
328+
let num_plus_one = fx.bcx.ins().iadd_imm(sum, 1);
329+
let res = fx.bcx.ins().ushr_imm(num_plus_one, 1);
330+
fx.bcx.ins().ireduce(lane_ty, res)
331+
},
332+
);
333+
}
334+
"llvm.x86.sse2.psra.w" => {
335+
intrinsic_args!(fx, args => (a, count); intrinsic);
336+
337+
let count_lane = count.force_stack(fx).0.load(fx, types::I64, MemFlags::trusted());
338+
let lane_ty = fx.clif_type(a.layout().ty.simd_size_and_type(fx.tcx).1).unwrap();
339+
let max_count = fx.bcx.ins().iconst(types::I64, i64::from(lane_ty.bits() - 1));
340+
let saturated_count = fx.bcx.ins().umin(count_lane, max_count);
341+
342+
// FIXME use vector instructions when possible
343+
simd_for_each_lane(fx, a, ret, &|fx, _lane_ty, _res_lane_ty, a_lane| {
344+
fx.bcx.ins().sshr(a_lane, saturated_count)
345+
});
346+
}
313347
_ => {
314348
fx.tcx
315349
.sess

0 commit comments

Comments
 (0)