@@ -344,6 +344,109 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
344
344
fx. bcx . ins ( ) . sshr ( a_lane, saturated_count)
345
345
} ) ;
346
346
}
347
+ "llvm.x86.sse2.psad.bw" => {
348
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
349
+
350
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
351
+ let layout = a. layout ( ) ;
352
+
353
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
354
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
355
+ assert_eq ! ( lane_ty, fx. tcx. types. u8 ) ;
356
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u64 ) ;
357
+ assert_eq ! ( lane_count, ret_lane_count * 8 ) ;
358
+
359
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u64 ) ;
360
+ for out_lane_idx in 0 ..lane_count / 8 {
361
+ let mut lane_diff_acc = fx. bcx . ins ( ) . iconst ( types:: I64 , 0 ) ;
362
+
363
+ for lane_idx in out_lane_idx * 8 ..out_lane_idx * 8 + 1 {
364
+ let a_lane = a. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
365
+ let b_lane = b. value_lane ( fx, lane_idx) . load_scalar ( fx) ;
366
+
367
+ let lane_diff = fx. bcx . ins ( ) . isub ( a_lane, b_lane) ;
368
+ let abs_lane_diff = fx. bcx . ins ( ) . iabs ( lane_diff) ;
369
+ let abs_lane_diff = fx. bcx . ins ( ) . uextend ( types:: I64 , abs_lane_diff) ;
370
+ lane_diff_acc = fx. bcx . ins ( ) . iadd ( lane_diff_acc, abs_lane_diff) ;
371
+ }
372
+
373
+ let res_lane = CValue :: by_val ( lane_diff_acc, ret_lane_layout) ;
374
+
375
+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
376
+ }
377
+ }
378
+ "llvm.x86.ssse3.pmadd.ub.sw.128" => {
379
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
380
+
381
+ let ( lane_count, lane_ty) = a. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
382
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
383
+ assert_eq ! ( lane_ty, fx. tcx. types. u8 ) ;
384
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
385
+ assert_eq ! ( lane_count, ret_lane_count * 2 ) ;
386
+
387
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
388
+ for out_lane_idx in 0 ..lane_count / 2 {
389
+ let a_lane0 = a. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
390
+ let a_lane0 = fx. bcx . ins ( ) . uextend ( types:: I16 , a_lane0) ;
391
+ let b_lane0 = b. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
392
+ let b_lane0 = fx. bcx . ins ( ) . sextend ( types:: I16 , b_lane0) ;
393
+
394
+ let a_lane1 = a. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
395
+ let a_lane1 = fx. bcx . ins ( ) . uextend ( types:: I16 , a_lane1) ;
396
+ let b_lane1 = b. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
397
+ let b_lane1 = fx. bcx . ins ( ) . sextend ( types:: I16 , b_lane1) ;
398
+
399
+ let mul0: Value = fx. bcx . ins ( ) . imul ( a_lane0, b_lane0) ;
400
+ let mul1 = fx. bcx . ins ( ) . imul ( a_lane1, b_lane1) ;
401
+
402
+ let ( val, has_overflow) = fx. bcx . ins ( ) . sadd_overflow ( mul0, mul1) ;
403
+
404
+ let rhs_ge_zero = fx. bcx . ins ( ) . icmp_imm ( IntCC :: SignedGreaterThanOrEqual , mul1, 0 ) ;
405
+
406
+ let min = fx. bcx . ins ( ) . iconst ( types:: I16 , i64:: from ( i16:: MIN as u16 ) ) ;
407
+ let max = fx. bcx . ins ( ) . iconst ( types:: I16 , i64:: from ( i16:: MAX as u16 ) ) ;
408
+
409
+ let sat_val = fx. bcx . ins ( ) . select ( rhs_ge_zero, max, min) ;
410
+ let res_lane = fx. bcx . ins ( ) . select ( has_overflow, sat_val, val) ;
411
+
412
+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
413
+
414
+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
415
+ }
416
+ }
417
+ "llvm.x86.sse2.pmadd.wd" => {
418
+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
419
+
420
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
421
+ let layout = a. layout ( ) ;
422
+
423
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
424
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
425
+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
426
+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i32 ) ;
427
+ assert_eq ! ( lane_count, ret_lane_count * 2 ) ;
428
+
429
+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i32 ) ;
430
+ for out_lane_idx in 0 ..lane_count / 2 {
431
+ let a_lane0 = a. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
432
+ let a_lane0 = fx. bcx . ins ( ) . uextend ( types:: I32 , a_lane0) ;
433
+ let b_lane0 = b. value_lane ( fx, out_lane_idx * 2 ) . load_scalar ( fx) ;
434
+ let b_lane0 = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane0) ;
435
+
436
+ let a_lane1 = a. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
437
+ let a_lane1 = fx. bcx . ins ( ) . uextend ( types:: I32 , a_lane1) ;
438
+ let b_lane1 = b. value_lane ( fx, out_lane_idx * 2 + 1 ) . load_scalar ( fx) ;
439
+ let b_lane1 = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane1) ;
440
+
441
+ let mul0: Value = fx. bcx . ins ( ) . imul ( a_lane0, b_lane0) ;
442
+ let mul1 = fx. bcx . ins ( ) . imul ( a_lane1, b_lane1) ;
443
+
444
+ let res_lane = fx. bcx . ins ( ) . iadd ( mul0, mul1) ;
445
+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
446
+
447
+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
448
+ }
449
+ }
347
450
_ => {
348
451
fx. tcx
349
452
. sess
0 commit comments