@@ -719,66 +719,41 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
719
719
}
720
720
721
721
"llvm.x86.pclmulqdq" => {
722
- // FIXME use inline asm
723
722
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_clmulepi64_si128&ig_expand=772
724
- intrinsic_args ! ( fx, args => ( a, b, imm8 ) ; intrinsic) ;
723
+ intrinsic_args ! ( fx, args => ( a, b, _imm8 ) ; intrinsic) ;
725
724
726
- assert_eq ! ( a. layout( ) , b. layout( ) ) ;
727
- let layout = a. layout ( ) ;
728
-
729
- let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
730
- let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
731
- assert_eq ! ( lane_ty, fx. tcx. types. i64 ) ;
732
- assert_eq ! ( ret_lane_ty, fx. tcx. types. i64 ) ;
733
- assert_eq ! ( lane_count, 2 ) ;
734
- assert_eq ! ( ret_lane_count, 2 ) ;
735
-
736
- let imm8 = imm8. load_scalar ( fx) ;
737
-
738
- let control0 = fx. bcx . ins ( ) . band_imm ( imm8, 0b0000_0001 ) ;
739
- let a_lane0 = a. value_lane ( fx, 0 ) . load_scalar ( fx) ;
740
- let a_lane1 = a. value_lane ( fx, 1 ) . load_scalar ( fx) ;
741
- let temp1 = fx. bcx . ins ( ) . select ( control0, a_lane1, a_lane0) ;
725
+ let a = a. load_scalar ( fx) ;
726
+ let b = b. load_scalar ( fx) ;
742
727
743
- let control4 = fx. bcx . ins ( ) . band_imm ( imm8, 0b0001_0000 ) ;
744
- let b_lane0 = b. value_lane ( fx, 0 ) . load_scalar ( fx) ;
745
- let b_lane1 = b. value_lane ( fx, 1 ) . load_scalar ( fx) ;
746
- let temp2 = fx. bcx . ins ( ) . select ( control4, b_lane1, b_lane0) ;
728
+ let imm8 = if let Some ( imm8) = crate :: constant:: mir_operand_get_const_val ( fx, & args[ 2 ] )
729
+ {
730
+ imm8
731
+ } else {
732
+ fx. tcx . sess . span_fatal (
733
+ span,
734
+ "Index argument for `_mm_clmulepi64_si128` is not a constant" ,
735
+ ) ;
736
+ } ;
747
737
748
- fn extract_bit ( fx : & mut FunctionCx < ' _ , ' _ , ' _ > , val : Value , bit : i64 ) -> Value {
749
- let tmp = fx. bcx . ins ( ) . ushr_imm ( val, bit) ;
750
- fx. bcx . ins ( ) . band_imm ( tmp, 1 )
751
- }
738
+ let imm8 = imm8. try_to_u8 ( ) . unwrap_or_else ( |_| panic ! ( "kind not scalar: {:?}" , imm8) ) ;
752
739
753
- let mut res1 = fx. bcx . ins ( ) . iconst ( types:: I64 , 0 ) ;
754
- for i in 0 ..=63 {
755
- let x = extract_bit ( fx, temp1, 0 ) ;
756
- let y = extract_bit ( fx, temp2, i) ;
757
- let mut temp = fx. bcx . ins ( ) . band ( x, y) ;
758
- for j in 1 ..=i {
759
- let x = extract_bit ( fx, temp1, j) ;
760
- let y = extract_bit ( fx, temp2, i - j) ;
761
- let z = fx. bcx . ins ( ) . band ( x, y) ;
762
- temp = fx. bcx . ins ( ) . bxor ( temp, z) ;
763
- }
764
- let temp = fx. bcx . ins ( ) . ishl_imm ( temp, i) ;
765
- res1 = fx. bcx . ins ( ) . bor ( res1, temp) ;
766
- }
767
- ret. place_lane ( fx, 0 ) . to_ptr ( ) . store ( fx, res1, MemFlags :: trusted ( ) ) ;
768
-
769
- let mut res2 = fx. bcx . ins ( ) . iconst ( types:: I64 , 0 ) ;
770
- for i in 64 ..=127 {
771
- let mut temp = fx. bcx . ins ( ) . iconst ( types:: I64 , 0 ) ;
772
- for j in i - 63 ..=63 {
773
- let x = extract_bit ( fx, temp1, j) ;
774
- let y = extract_bit ( fx, temp2, i - j) ;
775
- let z = fx. bcx . ins ( ) . band ( x, y) ;
776
- temp = fx. bcx . ins ( ) . bxor ( temp, z) ;
777
- }
778
- let temp = fx. bcx . ins ( ) . ishl_imm ( temp, i) ;
779
- res2 = fx. bcx . ins ( ) . bor ( res2, temp) ;
780
- }
781
- ret. place_lane ( fx, 1 ) . to_ptr ( ) . store ( fx, res2, MemFlags :: trusted ( ) ) ;
740
+ codegen_inline_asm_inner (
741
+ fx,
742
+ & [ InlineAsmTemplatePiece :: String ( format ! ( "pclmulqdq xmm0, xmm1, {imm8}" ) ) ] ,
743
+ & [
744
+ CInlineAsmOperand :: InOut {
745
+ reg : InlineAsmRegOrRegClass :: Reg ( InlineAsmReg :: X86 ( X86InlineAsmReg :: xmm0) ) ,
746
+ _late : true ,
747
+ in_value : a,
748
+ out_place : Some ( ret) ,
749
+ } ,
750
+ CInlineAsmOperand :: In {
751
+ reg : InlineAsmRegOrRegClass :: Reg ( InlineAsmReg :: X86 ( X86InlineAsmReg :: xmm1) ) ,
752
+ value : b,
753
+ } ,
754
+ ] ,
755
+ InlineAsmOptions :: NOSTACK | InlineAsmOptions :: PURE | InlineAsmOptions :: NOMEM ,
756
+ ) ;
782
757
}
783
758
784
759
"llvm.x86.aesni.aeskeygenassist" => {
0 commit comments