@@ -735,6 +735,117 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
735
735
}
736
736
}
737
737
738
+ "llvm.x86.fma.vfmaddsub.ps"
739
+ | "llvm.x86.fma.vfmaddsub.pd"
740
+ | "llvm.x86.fma.vfmaddsub.ps.256"
741
+ | "llvm.x86.fma.vfmaddsub.pd.256" => {
742
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_ps&ig_expand=3205
743
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmaddsub_pd&ig_expand=3181
744
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_ps&ig_expand=3209
745
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmaddsub_pd&ig_expand=3185
746
+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
747
+
748
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
749
+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
750
+ let layout = a. layout ( ) ;
751
+
752
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
753
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
754
+ assert ! ( lane_ty. is_floating_point( ) ) ;
755
+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
756
+ assert_eq ! ( lane_count, ret_lane_count) ;
757
+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
758
+
759
+ for idx in 0 ..lane_count {
760
+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
761
+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
762
+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
763
+
764
+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
765
+ let res = if idx & 1 == 0 {
766
+ fx. bcx . ins ( ) . fsub ( mul, c_lane)
767
+ } else {
768
+ fx. bcx . ins ( ) . fadd ( mul, c_lane)
769
+ } ;
770
+
771
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
772
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
773
+ }
774
+ }
775
+
776
+ "llvm.x86.fma.vfmsubadd.ps"
777
+ | "llvm.x86.fma.vfmsubadd.pd"
778
+ | "llvm.x86.fma.vfmsubadd.ps.256"
779
+ | "llvm.x86.fma.vfmsubadd.pd.256" => {
780
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_ps&ig_expand=3325
781
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fmsubadd_pd&ig_expand=3301
782
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_ps&ig_expand=3329
783
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fmsubadd_pd&ig_expand=3305
784
+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
785
+
786
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
787
+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
788
+ let layout = a. layout ( ) ;
789
+
790
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
791
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
792
+ assert ! ( lane_ty. is_floating_point( ) ) ;
793
+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
794
+ assert_eq ! ( lane_count, ret_lane_count) ;
795
+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
796
+
797
+ for idx in 0 ..lane_count {
798
+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
799
+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
800
+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
801
+
802
+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
803
+ let res = if idx & 1 == 0 {
804
+ fx. bcx . ins ( ) . fadd ( mul, c_lane)
805
+ } else {
806
+ fx. bcx . ins ( ) . fsub ( mul, c_lane)
807
+ } ;
808
+
809
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
810
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
811
+ }
812
+ }
813
+
814
+ "llvm.x86.fma.vfnmadd.ps"
815
+ | "llvm.x86.fma.vfnmadd.pd"
816
+ | "llvm.x86.fma.vfnmadd.ps.256"
817
+ | "llvm.x86.fma.vfnmadd.pd.256" => {
818
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_ps&ig_expand=3391
819
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_fnmadd_pd&ig_expand=3367
820
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_ps&ig_expand=3395
821
+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_fnmadd_pd&ig_expand=3371
822
+ intrinsic_args ! ( fx, args => ( a, b, c) ; intrinsic) ;
823
+
824
+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
825
+ assert_eq ! ( a. layout( ) , c. layout( ) ) ;
826
+ let layout = a. layout ( ) ;
827
+
828
+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
829
+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
830
+ assert ! ( lane_ty. is_floating_point( ) ) ;
831
+ assert ! ( ret_lane_ty. is_floating_point( ) ) ;
832
+ assert_eq ! ( lane_count, ret_lane_count) ;
833
+ let ret_lane_layout = fx. layout_of ( ret_lane_ty) ;
834
+
835
+ for idx in 0 ..lane_count {
836
+ let a_lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
837
+ let b_lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
838
+ let c_lane = c. value_lane ( fx, idx) . load_scalar ( fx) ;
839
+
840
+ let mul = fx. bcx . ins ( ) . fmul ( a_lane, b_lane) ;
841
+ let neg_mul = fx. bcx . ins ( ) . fneg ( mul) ;
842
+ let res = fx. bcx . ins ( ) . fadd ( neg_mul, c_lane) ;
843
+
844
+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
845
+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
846
+ }
847
+ }
848
+
738
849
"llvm.x86.sse42.pcmpestri128" => {
739
850
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_cmpestri&ig_expand=939
740
851
intrinsic_args ! ( fx, args => ( a, la, b, lb, _imm8) ; intrinsic) ;
0 commit comments