@@ -310,6 +310,40 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
310
310
let val = CValue :: by_val_pair ( cb_out, c, layout) ;
311
311
ret. write_cvalue ( fx, val) ;
312
312
}
313
+ "llvm.x86.sse2.pavg.b" | "llvm.x86.sse2.pavg.w" => {
314
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
315
+
316
+ // FIXME use vector instructions when possible
317
+ simd_pair_for_each_lane (
318
+ fx,
319
+ a,
320
+ b,
321
+ ret,
322
+ & |fx, _lane_ty, _res_lane_ty, a_lane, b_lane| {
323
+ // (a + b + 1) >> 1
324
+ let lane_ty = fx. bcx . func . dfg . value_type ( a_lane) ;
325
+ let a_lane = fx. bcx . ins ( ) . uextend ( lane_ty. double_width ( ) . unwrap ( ) , a_lane) ;
326
+ let b_lane = fx. bcx . ins ( ) . uextend ( lane_ty. double_width ( ) . unwrap ( ) , b_lane) ;
327
+ let sum = fx. bcx . ins ( ) . iadd ( a_lane, b_lane) ;
328
+ let num_plus_one = fx. bcx . ins ( ) . iadd_imm ( sum, 1 ) ;
329
+ let res = fx. bcx . ins ( ) . ushr_imm ( num_plus_one, 1 ) ;
330
+ fx. bcx . ins ( ) . ireduce ( lane_ty, res)
331
+ } ,
332
+ ) ;
333
+ }
334
+ "llvm.x86.sse2.psra.w" => {
335
+ intrinsic_args ! ( fx, args => ( a, count) ; intrinsic) ;
336
+
337
+ let count_lane = count. force_stack ( fx) . 0 . load ( fx, types:: I64 , MemFlags :: trusted ( ) ) ;
338
+ let lane_ty = fx. clif_type ( a. layout ( ) . ty . simd_size_and_type ( fx. tcx ) . 1 ) . unwrap ( ) ;
339
+ let max_count = fx. bcx . ins ( ) . iconst ( types:: I64 , i64:: from ( lane_ty. bits ( ) - 1 ) ) ;
340
+ let saturated_count = fx. bcx . ins ( ) . umin ( count_lane, max_count) ;
341
+
342
+ // FIXME use vector instructions when possible
343
+ simd_for_each_lane ( fx, a, ret, & |fx, _lane_ty, _res_lane_ty, a_lane| {
344
+ fx. bcx . ins ( ) . sshr ( a_lane, saturated_count)
345
+ } ) ;
346
+ }
313
347
_ => {
314
348
fx. tcx
315
349
. sess
0 commit comments