Skip to content

Commit 7a4aaf4

Browse files
folkertdevAmanieu
authored andcommitted
implement vec_round and friends
1 parent bfa1b0d commit 7a4aaf4

File tree

1 file changed

+284
-0
lines changed

1 file changed

+284
-0
lines changed

Diff for: crates/core_arch/src/s390x/vector.rs

+284
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,10 @@ unsafe extern "unadjusted" {
7373
#[link_name = "llvm.umin.v8i16"] fn vmnlh(a: vector_unsigned_short, b: vector_unsigned_short) -> vector_unsigned_short;
7474
#[link_name = "llvm.umin.v4i32"] fn vmnlf(a: vector_unsigned_int, b: vector_unsigned_int) -> vector_unsigned_int;
7575
#[link_name = "llvm.umin.v2i64"] fn vmnlg(a: vector_unsigned_long_long, b: vector_unsigned_long_long) -> vector_unsigned_long_long;
76+
77+
#[link_name = "llvm.s390.vfisb"] fn vfisb(a: vector_float, b: i32, c: i32) -> vector_float;
78+
#[link_name = "llvm.s390.vfidb"] fn vfidb(a: vector_double, b: i32, c: i32) -> vector_double;
79+
7680
}
7781

7882
impl_from! { i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, i64x2, u64x2, f32x4, f64x2 }
@@ -629,6 +633,71 @@ mod sealed {
629633
}
630634

631635
impl_vec_trait! { [VectorOrc vec_orc]+ 2c (orc) }
636+
637+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
638+
pub trait VectorRound: Sized {
639+
unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self;
640+
641+
#[inline]
642+
#[target_feature(enable = "vector")]
643+
unsafe fn vec_roundc(self) -> Self {
644+
self.vec_round_impl::<4, 0>()
645+
}
646+
647+
#[inline]
648+
#[target_feature(enable = "vector")]
649+
unsafe fn vec_round(self) -> Self {
650+
// NOTE: simd_round resoles ties by rounding away from zero,
651+
// while the vec_round function rounds towards zero
652+
self.vec_round_impl::<4, 4>()
653+
}
654+
655+
// NOTE: vec_roundz (vec_round_impl::<4, 5>) is the same as vec_trunc
656+
#[inline]
657+
#[target_feature(enable = "vector")]
658+
unsafe fn vec_trunc(self) -> Self {
659+
simd_trunc(self)
660+
}
661+
662+
// NOTE: vec_roundp (vec_round_impl::<4, 6>) is the same as vec_ceil
663+
#[inline]
664+
#[target_feature(enable = "vector")]
665+
unsafe fn vec_ceil(self) -> Self {
666+
simd_ceil(self)
667+
}
668+
669+
// NOTE: vec_roundm (vec_round_impl::<4, 7>) is the same as vec_floor
670+
#[inline]
671+
#[target_feature(enable = "vector")]
672+
unsafe fn vec_floor(self) -> Self {
673+
simd_floor(self)
674+
}
675+
676+
#[inline]
677+
#[target_feature(enable = "vector")]
678+
unsafe fn vec_rint(self) -> Self {
679+
self.vec_round_impl::<0, 0>()
680+
}
681+
}
682+
683+
// FIXME(vector-enhancements-1) apply the right target feature to all methods
684+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
685+
impl VectorRound for vector_float {
686+
#[inline]
687+
#[target_feature(enable = "vector")]
688+
unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
689+
vfisb(self, N, MODE)
690+
}
691+
}
692+
693+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
694+
impl VectorRound for vector_double {
695+
#[inline]
696+
#[target_feature(enable = "vector")]
697+
unsafe fn vec_round_impl<const N: i32, const MODE: i32>(self) -> Self {
698+
vfidb(self, N, MODE)
699+
}
700+
}
632701
}
633702

634703
/// Vector element-wise addition.
@@ -843,6 +912,125 @@ where
843912
a.vec_orc(b)
844913
}
845914

915+
/// Vector floor.
916+
#[inline]
917+
#[target_feature(enable = "vector")]
918+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
919+
pub unsafe fn vec_floor<T>(a: T) -> T
920+
where
921+
T: sealed::VectorRound,
922+
{
923+
a.vec_floor()
924+
}
925+
926+
/// Vector ceil.
927+
#[inline]
928+
#[target_feature(enable = "vector")]
929+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
930+
pub unsafe fn vec_ceil<T>(a: T) -> T
931+
where
932+
T: sealed::VectorRound,
933+
{
934+
a.vec_ceil()
935+
}
936+
937+
/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
938+
/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
939+
#[inline]
940+
#[target_feature(enable = "vector")]
941+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
942+
pub unsafe fn vec_trunc<T>(a: T) -> T
943+
where
944+
T: sealed::VectorRound,
945+
{
946+
a.vec_trunc()
947+
}
948+
949+
/// Vector round, resolves ties by rounding towards zero.
950+
#[inline]
951+
#[target_feature(enable = "vector")]
952+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
953+
pub unsafe fn vec_round<T>(a: T) -> T
954+
where
955+
T: sealed::VectorRound,
956+
{
957+
a.vec_round()
958+
}
959+
960+
/// Returns a vector by using the current rounding mode to round every
961+
/// floating-point element in the given vector to integer.
962+
#[inline]
963+
#[target_feature(enable = "vector")]
964+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
965+
pub unsafe fn vec_roundc<T>(a: T) -> T
966+
where
967+
T: sealed::VectorRound,
968+
{
969+
a.vec_roundc()
970+
}
971+
972+
/// Returns a vector containing the largest representable floating-point integral values less
973+
/// than or equal to the values of the corresponding elements of the given vector.
974+
#[inline]
975+
#[target_feature(enable = "vector")]
976+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
977+
pub unsafe fn vec_roundm<T>(a: T) -> T
978+
where
979+
T: sealed::VectorRound,
980+
{
981+
// the IBM docs note
982+
//
983+
// > vec_roundm provides the same functionality as vec_floor, except that vec_roundz would not trigger the IEEE-inexact exception.
984+
//
985+
// but in practice `vec_floor` also does not trigger that exception, so both are equivalent
986+
a.vec_floor()
987+
}
988+
989+
/// Returns a vector containing the smallest representable floating-point integral values greater
990+
/// than or equal to the values of the corresponding elements of the given vector.
991+
#[inline]
992+
#[target_feature(enable = "vector")]
993+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
994+
pub unsafe fn vec_roundp<T>(a: T) -> T
995+
where
996+
T: sealed::VectorRound,
997+
{
998+
// the IBM docs note
999+
//
1000+
// > vec_roundp provides the same functionality as vec_ceil, except that vec_roundz would not trigger the IEEE-inexact exception.
1001+
//
1002+
// but in practice `vec_ceil` also does not trigger that exception, so both are equivalent
1003+
a.vec_ceil()
1004+
}
1005+
1006+
/// Returns a vector containing the truncated values of the corresponding elements of the given vector.
1007+
/// Each element of the result contains the value of the corresponding element of a, truncated to an integral value.
1008+
#[inline]
1009+
#[target_feature(enable = "vector")]
1010+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1011+
pub unsafe fn vec_roundz<T>(a: T) -> T
1012+
where
1013+
T: sealed::VectorRound,
1014+
{
1015+
// the IBM docs note
1016+
//
1017+
// > vec_roundz provides the same functionality as vec_trunc, except that vec_roundz would not trigger the IEEE-inexact exception.
1018+
//
1019+
// but in practice `vec_trunc` also does not trigger that exception, so both are equivalent
1020+
a.vec_trunc()
1021+
}
1022+
1023+
/// Returns a vector by using the current rounding mode to round every floating-point element in the given vector to integer.
1024+
#[inline]
1025+
#[target_feature(enable = "vector")]
1026+
#[unstable(feature = "stdarch_s390x", issue = "135681")]
1027+
pub unsafe fn vec_rint<T>(a: T) -> T
1028+
where
1029+
T: sealed::VectorRound,
1030+
{
1031+
a.vec_rint()
1032+
}
1033+
8461034
#[cfg(test)]
8471035
mod tests {
8481036
use super::*;
@@ -852,6 +1040,33 @@ mod tests {
8521040
use crate::core_arch::simd::*;
8531041
use stdarch_test::simd_test;
8541042

1043+
macro_rules! test_vec_1 {
1044+
{ $name: ident, $fn:ident, f32x4, [$($a:expr),+], ~[$($d:expr),+] } => {
1045+
#[simd_test(enable = "vector")]
1046+
unsafe fn $name() {
1047+
let a: vector_float = transmute(f32x4::new($($a),+));
1048+
1049+
let d: vector_float = transmute(f32x4::new($($d),+));
1050+
let r = transmute(vec_cmple(vec_abs(vec_sub($fn(a), d)), vec_splats(f32::EPSILON)));
1051+
let e = m32x4::new(true, true, true, true);
1052+
assert_eq!(e, r);
1053+
}
1054+
};
1055+
{ $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($d:expr),+] } => {
1056+
test_vec_1! { $name, $fn, $ty -> $ty, [$($a),+], [$($d),+] }
1057+
};
1058+
{ $name: ident, $fn:ident, $ty: ident -> $ty_out: ident, [$($a:expr),+], [$($d:expr),+] } => {
1059+
#[simd_test(enable = "vector")]
1060+
unsafe fn $name() {
1061+
let a: s_t_l!($ty) = transmute($ty::new($($a),+));
1062+
1063+
let d = $ty_out::new($($d),+);
1064+
let r : $ty_out = transmute($fn(a));
1065+
assert_eq!(d, r);
1066+
}
1067+
}
1068+
}
1069+
8551070
macro_rules! test_vec_2 {
8561071
{ $name: ident, $fn:ident, $ty: ident, [$($a:expr),+], [$($b:expr),+], [$($d:expr),+] } => {
8571072
test_vec_2! { $name, $fn, $ty -> $ty, [$($a),+], [$($b),+], [$($d),+] }
@@ -1059,4 +1274,73 @@ mod tests {
10591274
[0b11001100, 0b11001100, 0b11001100, 0b11001100],
10601275
[0b00110011, 0b11110011, 0b00001100, 0b00000000],
10611276
[!0b11111111, !0b00111111, !0b11000000, !0b11001100] }
1277+
1278+
test_vec_1! { test_vec_floor_f32, vec_floor, f32x4,
1279+
[1.1, 1.9, -0.5, -0.9],
1280+
[1.0, 1.0, -1.0, -1.0]
1281+
}
1282+
1283+
test_vec_1! { test_vec_floor_f64_1, vec_floor, f64x2,
1284+
[1.1, 1.9],
1285+
[1.0, 1.0]
1286+
}
1287+
test_vec_1! { test_vec_floor_f64_2, vec_floor, f64x2,
1288+
[-0.5, -0.9],
1289+
[-1.0, -1.0]
1290+
}
1291+
1292+
test_vec_1! { test_vec_ceil_f32, vec_ceil, f32x4,
1293+
[0.1, 0.5, 0.6, 0.9],
1294+
[1.0, 1.0, 1.0, 1.0]
1295+
}
1296+
test_vec_1! { test_vec_ceil_f64_1, vec_ceil, f64x2,
1297+
[0.1, 0.5],
1298+
[1.0, 1.0]
1299+
}
1300+
test_vec_1! { test_vec_ceil_f64_2, vec_ceil, f64x2,
1301+
[0.6, 0.9],
1302+
[1.0, 1.0]
1303+
}
1304+
1305+
// FIXME(vector-enhancements-1)
1306+
// test_vec_1! { test_vec_round_f32, vec_round, f32x4,
1307+
// [],
1308+
// []
1309+
// }
1310+
test_vec_1! { test_vec_round_f64_1, vec_round, f64x2,
1311+
[0.1, 0.5],
1312+
[0.0, 0.0]
1313+
}
1314+
test_vec_1! { test_vec_round_f64_2, vec_round, f64x2,
1315+
[0.6, 0.9],
1316+
[1.0, 1.0]
1317+
}
1318+
1319+
// FIXME(vector-enhancements-1)
1320+
// test_vec_1! { test_vec_roundc_f32, vec_roundc, f32x4,
1321+
// [],
1322+
// []
1323+
// }
1324+
test_vec_1! { test_vec_roundc_f64_1, vec_roundc, f64x2,
1325+
[0.1, 0.5],
1326+
[0.0, 0.0]
1327+
}
1328+
test_vec_1! { test_vec_roundc_f64_2, vec_roundc, f64x2,
1329+
[0.6, 0.9],
1330+
[1.0, 1.0]
1331+
}
1332+
1333+
// FIXME(vector-enhancements-1)
1334+
// test_vec_1! { test_vec_rint_f32, vec_rint, f32x4,
1335+
// [],
1336+
// []
1337+
// }
1338+
test_vec_1! { test_vec_rint_f64_1, vec_rint, f64x2,
1339+
[0.1, 0.5],
1340+
[0.0, 0.0]
1341+
}
1342+
test_vec_1! { test_vec_rint_f64_2, vec_rint, f64x2,
1343+
[0.6, 0.9],
1344+
[1.0, 1.0]
1345+
}
10621346
}

0 commit comments

Comments
 (0)