Skip to content

Commit 0024ba9

Browse files
krampenschiesseralexcrichton
authored andcommitted
added support for _mm_cvtpd_ps / cvtpd2ps
1 parent 0970902 commit 0024ba9

File tree

1 file changed

+27
-0
lines changed

1 file changed

+27
-0
lines changed

src/x86/sse2.rs

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1718,6 +1718,14 @@ pub unsafe fn _mm_ucomineq_sd(a: f64x2, b: f64x2) -> bool {
17181718
mem::transmute(ucomineqsd(a, b) as u8)
17191719
}
17201720

1721+
/// Convert packed double-precision (64-bit) floating-point elements in "a" to packed single-precision (32-bit) floating-point elements
1722+
#[inline(always)]
1723+
#[target_feature = "+sse2"]
1724+
#[cfg_attr(test, assert_instr(cvtpd2ps))]
1725+
pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
1726+
cvtpd2ps(a)
1727+
}
1728+
17211729
/// Return a mask of the most significant bit of each element in `a`.
17221730
///
17231731
/// The mask is stored in the 2 least significant bits of the return value.
@@ -1882,6 +1890,8 @@ extern {
18821890
fn ucomineqsd(a: f64x2, b: f64x2) -> i32;
18831891
#[link_name = "llvm.x86.sse2.movmsk.pd"]
18841892
fn movmskpd(a: f64x2) -> i32;
1893+
#[link_name = "llvm.x86.sse2.cvtpd2ps"]
1894+
fn cvtpd2ps(a: f64x2) -> f32x4;
18851895
}
18861896

18871897
#[cfg(test)]
@@ -3406,4 +3416,21 @@ mod tests {
34063416
let r = sse2::_mm_movemask_pd(f64x2::new(-1.0, -5.0));
34073417
assert_eq!(r, 0b11);
34083418
}
3419+
3420+
#[simd_test = "sse2"]
3421+
unsafe fn _mm_cvtpd_ps() {
3422+
use std::{f64,f32};
3423+
3424+
let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, 5.0));
3425+
assert_eq!(r, f32x4::new(-1.0, 5.0, 0.0, 0.0));
3426+
3427+
let r = sse2::_mm_cvtpd_ps(f64x2::new(-1.0, -5.0));
3428+
assert_eq!(r, f32x4::new(-1.0, -5.0, 0.0, 0.0));
3429+
3430+
let r = sse2::_mm_cvtpd_ps(f64x2::new(f64::MAX, f64::MIN));
3431+
assert_eq!(r, f32x4::new(f32::INFINITY, f32::NEG_INFINITY, 0.0,0.0));
3432+
3433+
let r = sse2::_mm_cvtpd_ps(f64x2::new(f32::MAX as f64, f32::MIN as f64));
3434+
assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0));
3435+
}
34093436
}

0 commit comments

Comments
 (0)