@@ -73,6 +73,10 @@ unsafe extern "unadjusted" {
73
73
#[ link_name = "llvm.umin.v8i16" ] fn vmnlh ( a : vector_unsigned_short , b : vector_unsigned_short ) -> vector_unsigned_short ;
74
74
#[ link_name = "llvm.umin.v4i32" ] fn vmnlf ( a : vector_unsigned_int , b : vector_unsigned_int ) -> vector_unsigned_int ;
75
75
#[ link_name = "llvm.umin.v2i64" ] fn vmnlg ( a : vector_unsigned_long_long , b : vector_unsigned_long_long ) -> vector_unsigned_long_long ;
76
+
77
+ #[ link_name = "llvm.s390.vfisb" ] fn vfisb ( a : vector_float , b : i32 , c : i32 ) -> vector_float ;
78
+ #[ link_name = "llvm.s390.vfidb" ] fn vfidb ( a : vector_double , b : i32 , c : i32 ) -> vector_double ;
79
+
76
80
}
77
81
78
82
impl_from ! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -629,6 +633,71 @@ mod sealed {
629
633
}
630
634
631
635
impl_vec_trait ! { [ VectorOrc vec_orc] + 2 c ( orc) }
636
+
637
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
638
+ pub trait VectorRound : Sized {
639
+ unsafe fn vec_round_impl < const N : i32 , const MODE : i32 > ( self ) -> Self ;
640
+
641
+ #[ inline]
642
+ #[ target_feature( enable = "vector" ) ]
643
+ unsafe fn vec_roundc ( self ) -> Self {
644
+ self . vec_round_impl :: < 4 , 0 > ( )
645
+ }
646
+
647
+ #[ inline]
648
+ #[ target_feature( enable = "vector" ) ]
649
+ unsafe fn vec_round ( self ) -> Self {
650
+ // NOTE: simd_round resoles ties by rounding away from zero,
651
+ // while the vec_round function rounds towards zero
652
+ self . vec_round_impl :: < 4 , 4 > ( )
653
+ }
654
+
655
+ // NOTE: vec_roundz (vec_round_impl::<4, 5>) is the same as vec_trunc
656
+ #[ inline]
657
+ #[ target_feature( enable = "vector" ) ]
658
+ unsafe fn vec_trunc ( self ) -> Self {
659
+ simd_trunc ( self )
660
+ }
661
+
662
+ // NOTE: vec_roundp (vec_round_impl::<4, 6>) is the same as vec_ceil
663
+ #[ inline]
664
+ #[ target_feature( enable = "vector" ) ]
665
+ unsafe fn vec_ceil ( self ) -> Self {
666
+ simd_ceil ( self )
667
+ }
668
+
669
+ // NOTE: vec_roundm (vec_round_impl::<4, 7>) is the same as vec_floor
670
+ #[ inline]
671
+ #[ target_feature( enable = "vector" ) ]
672
+ unsafe fn vec_floor ( self ) -> Self {
673
+ simd_floor ( self )
674
+ }
675
+
676
+ #[ inline]
677
+ #[ target_feature( enable = "vector" ) ]
678
+ unsafe fn vec_rint ( self ) -> Self {
679
+ self . vec_round_impl :: < 0 , 0 > ( )
680
+ }
681
+ }
682
+
683
+ // FIXME(vector-enhancements-1) apply the right target feature to all methods
684
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
685
+ impl VectorRound for vector_float {
686
+ #[ inline]
687
+ #[ target_feature( enable = "vector" ) ]
688
+ unsafe fn vec_round_impl < const N : i32 , const MODE : i32 > ( self ) -> Self {
689
+ vfisb ( self , N , MODE )
690
+ }
691
+ }
692
+
693
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
694
+ impl VectorRound for vector_double {
695
+ #[ inline]
696
+ #[ target_feature( enable = "vector" ) ]
697
+ unsafe fn vec_round_impl < const N : i32 , const MODE : i32 > ( self ) -> Self {
698
+ vfidb ( self , N , MODE )
699
+ }
700
+ }
632
701
}
633
702
634
703
/// Vector element-wise addition.
@@ -843,6 +912,125 @@ where
843
912
a. vec_orc ( b)
844
913
}
845
914
915
+ /// Vector floor.
916
+ #[ inline]
917
+ #[ target_feature( enable = "vector" ) ]
918
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
919
+ pub unsafe fn vec_floor < T > ( a : T ) -> T
920
+ where
921
+ T : sealed:: VectorRound ,
922
+ {
923
+ a. vec_floor ( )
924
+ }
925
+
926
+ /// Vector ceil.
927
+ #[ inline]
928
+ #[ target_feature( enable = "vector" ) ]
929
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
930
+ pub unsafe fn vec_ceil < T > ( a : T ) -> T
931
+ where
932
+ T : sealed:: VectorRound ,
933
+ {
934
+ a. vec_ceil ( )
935
+ }
936
+
937
+ /// Returns a vector containing the truncated values of the corresponding elements of the given vector.
938
+ /// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
939
+ #[ inline]
940
+ #[ target_feature( enable = "vector" ) ]
941
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
942
+ pub unsafe fn vec_trunc < T > ( a : T ) -> T
943
+ where
944
+ T : sealed:: VectorRound ,
945
+ {
946
+ a. vec_trunc ( )
947
+ }
948
+
949
+ /// Vector round, resolves ties by rounding towards zero.
950
+ #[ inline]
951
+ #[ target_feature( enable = "vector" ) ]
952
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
953
+ pub unsafe fn vec_round < T > ( a : T ) -> T
954
+ where
955
+ T : sealed:: VectorRound ,
956
+ {
957
+ a. vec_round ( )
958
+ }
959
+
960
+ /// Returns a vector by using the current rounding mode to round every
961
+ /// floating-point element in the given vector to integer.
962
+ #[ inline]
963
+ #[ target_feature( enable = "vector" ) ]
964
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
965
+ pub unsafe fn vec_roundc < T > ( a : T ) -> T
966
+ where
967
+ T : sealed:: VectorRound ,
968
+ {
969
+ a. vec_roundc ( )
970
+ }
971
+
972
+ /// Returns a vector containing the largest representable floating-point integral values less
973
+ /// than or equal to the values of the corresponding elements of the given vector.
974
+ #[ inline]
975
+ #[ target_feature( enable = "vector" ) ]
976
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
977
+ pub unsafe fn vec_roundm < T > ( a : T ) -> T
978
+ where
979
+ T : sealed:: VectorRound ,
980
+ {
981
+ // the IBM docs note
982
+ //
983
+ // > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
984
+ //
985
+ // but in practice `vec_floor` also does not trigger that exception, so both are equivalent
986
+ a. vec_floor ( )
987
+ }
988
+
989
+ /// Returns a vector containing the smallest representable floating-point integral values greater
990
+ /// than or equal to the values of the corresponding elements of the given vector.
991
+ #[ inline]
992
+ #[ target_feature( enable = "vector" ) ]
993
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
994
+ pub unsafe fn vec_roundp < T > ( a : T ) -> T
995
+ where
996
+ T : sealed:: VectorRound ,
997
+ {
998
+ // the IBM docs note
999
+ //
1000
+ // > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
1001
+ //
1002
+ // but in practice `vec_ceil` also does not trigger that exception, so both are equivalent
1003
+ a. vec_ceil ( )
1004
+ }
1005
+
1006
+ /// Returns a vector containing the truncated values of the corresponding elements of the given vector.
1007
+ /// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
1008
+ #[ inline]
1009
+ #[ target_feature( enable = "vector" ) ]
1010
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
1011
+ pub unsafe fn vec_roundz < T > ( a : T ) -> T
1012
+ where
1013
+ T : sealed:: VectorRound ,
1014
+ {
1015
+ // the IBM docs note
1016
+ //
1017
+ // > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
1018
+ //
1019
+ // but in practice `vec_trunc` also does not trigger that exception, so both are equivalent
1020
+ a. vec_trunc ( )
1021
+ }
1022
+
1023
+ /// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer.
1024
+ #[ inline]
1025
+ #[ target_feature( enable = "vector" ) ]
1026
+ #[ unstable( feature = "stdarch_s390x" , issue = "135681" ) ]
1027
+ pub unsafe fn vec_rint < T > ( a : T ) -> T
1028
+ where
1029
+ T : sealed:: VectorRound ,
1030
+ {
1031
+ a. vec_rint ( )
1032
+ }
1033
+
846
1034
#[ cfg( test) ]
847
1035
mod tests {
848
1036
use super :: * ;
@@ -852,6 +1040,33 @@ mod tests {
852
1040
use crate :: core_arch:: simd:: * ;
853
1041
use stdarch_test:: simd_test;
854
1042
1043
+ macro_rules! test_vec_1 {
1044
+ { $name: ident, $fn: ident, f32x4, [ $( $a: expr) ,+] , ~[ $( $d: expr) ,+] } => {
1045
+ #[ simd_test( enable = "vector" ) ]
1046
+ unsafe fn $name( ) {
1047
+ let a: vector_float = transmute( f32x4:: new( $( $a) ,+) ) ;
1048
+
1049
+ let d: vector_float = transmute( f32x4:: new( $( $d) ,+) ) ;
1050
+ let r = transmute( vec_cmple( vec_abs( vec_sub( $fn( a) , d) ) , vec_splats( f32 :: EPSILON ) ) ) ;
1051
+ let e = m32x4:: new( true , true , true , true ) ;
1052
+ assert_eq!( e, r) ;
1053
+ }
1054
+ } ;
1055
+ { $name: ident, $fn: ident, $ty: ident, [ $( $a: expr) ,+] , [ $( $d: expr) ,+] } => {
1056
+ test_vec_1! { $name, $fn, $ty -> $ty, [ $( $a) ,+] , [ $( $d) ,+] }
1057
+ } ;
1058
+ { $name: ident, $fn: ident, $ty: ident -> $ty_out: ident, [ $( $a: expr) ,+] , [ $( $d: expr) ,+] } => {
1059
+ #[ simd_test( enable = "vector" ) ]
1060
+ unsafe fn $name( ) {
1061
+ let a: s_t_l!( $ty) = transmute( $ty:: new( $( $a) ,+) ) ;
1062
+
1063
+ let d = $ty_out:: new( $( $d) ,+) ;
1064
+ let r : $ty_out = transmute( $fn( a) ) ;
1065
+ assert_eq!( d, r) ;
1066
+ }
1067
+ }
1068
+ }
1069
+
855
1070
macro_rules! test_vec_2 {
856
1071
{ $name: ident, $fn: ident, $ty: ident, [ $( $a: expr) ,+] , [ $( $b: expr) ,+] , [ $( $d: expr) ,+] } => {
857
1072
test_vec_2! { $name, $fn, $ty -> $ty, [ $( $a) ,+] , [ $( $b) ,+] , [ $( $d) ,+] }
@@ -1059,4 +1274,73 @@ mod tests {
1059
1274
[ 0b11001100 , 0b11001100 , 0b11001100 , 0b11001100 ] ,
1060
1275
[ 0b00110011 , 0b11110011 , 0b00001100 , 0b00000000 ] ,
1061
1276
[ !0b11111111 , !0b00111111 , !0b11000000 , !0b11001100 ] }
1277
+
1278
+ test_vec_1 ! { test_vec_floor_f32, vec_floor, f32x4,
1279
+ [ 1.1 , 1.9 , -0.5 , -0.9 ] ,
1280
+ [ 1.0 , 1.0 , -1.0 , -1.0 ]
1281
+ }
1282
+
1283
+ test_vec_1 ! { test_vec_floor_f64_1, vec_floor, f64x2,
1284
+ [ 1.1 , 1.9 ] ,
1285
+ [ 1.0 , 1.0 ]
1286
+ }
1287
+ test_vec_1 ! { test_vec_floor_f64_2, vec_floor, f64x2,
1288
+ [ -0.5 , -0.9 ] ,
1289
+ [ -1.0 , -1.0 ]
1290
+ }
1291
+
1292
+ test_vec_1 ! { test_vec_ceil_f32, vec_ceil, f32x4,
1293
+ [ 0.1 , 0.5 , 0.6 , 0.9 ] ,
1294
+ [ 1.0 , 1.0 , 1.0 , 1.0 ]
1295
+ }
1296
+ test_vec_1 ! { test_vec_ceil_f64_1, vec_ceil, f64x2,
1297
+ [ 0.1 , 0.5 ] ,
1298
+ [ 1.0 , 1.0 ]
1299
+ }
1300
+ test_vec_1 ! { test_vec_ceil_f64_2, vec_ceil, f64x2,
1301
+ [ 0.6 , 0.9 ] ,
1302
+ [ 1.0 , 1.0 ]
1303
+ }
1304
+
1305
+ // FIXME(vector-enhancements-1)
1306
+ // test_vec_1! { test_vec_round_f32, vec_round, f32x4,
1307
+ // [],
1308
+ // []
1309
+ // }
1310
+ test_vec_1 ! { test_vec_round_f64_1, vec_round, f64x2,
1311
+ [ 0.1 , 0.5 ] ,
1312
+ [ 0.0 , 0.0 ]
1313
+ }
1314
+ test_vec_1 ! { test_vec_round_f64_2, vec_round, f64x2,
1315
+ [ 0.6 , 0.9 ] ,
1316
+ [ 1.0 , 1.0 ]
1317
+ }
1318
+
1319
+ // FIXME(vector-enhancements-1)
1320
+ // test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4,
1321
+ // [],
1322
+ // []
1323
+ // }
1324
+ test_vec_1 ! { test_vec_roundc_f64_1, vec_roundc, f64x2,
1325
+ [ 0.1 , 0.5 ] ,
1326
+ [ 0.0 , 0.0 ]
1327
+ }
1328
+ test_vec_1 ! { test_vec_roundc_f64_2, vec_roundc, f64x2,
1329
+ [ 0.6 , 0.9 ] ,
1330
+ [ 1.0 , 1.0 ]
1331
+ }
1332
+
1333
+ // FIXME(vector-enhancements-1)
1334
+ // test_vec_1! { test_vec_rint_f32, vec_rint, f32x4,
1335
+ // [],
1336
+ // []
1337
+ // }
1338
+ test_vec_1 ! { test_vec_rint_f64_1, vec_rint, f64x2,
1339
+ [ 0.1 , 0.5 ] ,
1340
+ [ 0.0 , 0.0 ]
1341
+ }
1342
+ test_vec_1 ! { test_vec_rint_f64_2, vec_rint, f64x2,
1343
+ [ 0.6 , 0.9 ] ,
1344
+ [ 1.0 , 1.0 ]
1345
+ }
1062
1346
}
0 commit comments