Skip to content

Commit 9679514

Browse files
pythoneeralexcrichton
authored andcommitted
* added _mm_cvtps_pd * added _mm_set_sd * added _mm_set1_pd * added _mm_set_pd1 * added _mm_set_pd * added _mm_setr_pd * added _mm_setzero_pd
1 parent d9d0ae4 commit 9679514

File tree

1 file changed

+100
-0
lines changed

1 file changed

+100
-0
lines changed

src/x86/sse2.rs

+100
Original file line numberDiff line numberDiff line change
@@ -1726,6 +1726,13 @@ pub unsafe fn _mm_cvtpd_ps(a: f64x2) -> f32x4 {
17261726
cvtpd2ps(a)
17271727
}
17281728

1729+
#[inline(always)]
1730+
#[target_feature = "+sse2"]
1731+
#[cfg_attr(test, assert_instr(cvtps2pd))]
1732+
pub unsafe fn _mm_cvtps_pd(a: f32x4) -> f64x2 {
1733+
cvtps2pd(a)
1734+
}
1735+
17291736
/// Convert packed double-precision (64-bit) floating-point elements in `a` to packed 32-bit integers.
17301737
#[inline(always)]
17311738
#[target_feature = "+sse2"]
@@ -1808,6 +1815,50 @@ pub unsafe fn _mm_cvttps_epi32(a: f32x4) -> i32x4 {
18081815
cvttps2dq(a)
18091816
}
18101817

1818+
/// Copy double-precision (64-bit) floating-point element `a` to the lower element of the
1819+
/// packed 64-bit return value
1820+
#[inline(always)]
1821+
#[target_feature = "+sse2"]
1822+
pub unsafe fn _mm_set_sd(a: f64) -> f64x2 {
1823+
f64x2::new(a, 0_f64)
1824+
}
1825+
1826+
/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
1827+
#[inline(always)]
1828+
#[target_feature = "+sse2"]
1829+
pub unsafe fn _mm_set1_pd(a: f64) -> f64x2 {
1830+
f64x2::new(a, a)
1831+
}
1832+
1833+
/// Broadcast double-precision (64-bit) floating-point value a to all elements of the return value
1834+
#[inline(always)]
1835+
#[target_feature = "+sse2"]
1836+
pub unsafe fn _mm_set_pd1(a: f64) -> f64x2 {
1837+
f64x2::new(a, a)
1838+
}
1839+
1840+
/// Set packed double-precision (64-bit) floating-point elements in the return value with the
1841+
/// supplied values.
1842+
#[inline(always)]
1843+
#[target_feature = "+sse2"]
1844+
pub unsafe fn _mm_set_pd(a: f64, b: f64) -> f64x2 {
1845+
f64x2::new(a, b)
1846+
}
1847+
1848+
/// Set packed double-precision (64-bit) floating-point elements in the return value with the
1849+
/// supplied values in reverse order.
1850+
#[inline(always)]
1851+
#[target_feature = "+sse2"]
1852+
pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> f64x2 {
1853+
f64x2::new(b, a)
1854+
}
1855+
1856+
/// returns packed double-precision (64-bit) floating-point elements with all zeros.
1857+
#[inline(always)]
1858+
#[target_feature = "+sse2"]
1859+
pub unsafe fn _mm_setzero_pd() -> f64x2 {
1860+
f64x2::splat(0_f64)
1861+
}
18111862

18121863
/// Return a mask of the most significant bit of each element in `a`.
18131864
///
@@ -1991,6 +2042,8 @@ extern {
19912042
fn movmskpd(a: f64x2) -> i32;
19922043
#[link_name = "llvm.x86.sse2.cvtpd2ps"]
19932044
fn cvtpd2ps(a: f64x2) -> f32x4;
2045+
#[link_name = "llvm.x86.sse2.cvtps2pd"]
2046+
fn cvtps2pd(a: f32x4) -> f64x2;
19942047
#[link_name = "llvm.x86.sse2.cvtpd2dq"]
19952048
fn cvtpd2dq(a: f64x2) -> i32x4;
19962049
#[link_name = "llvm.x86.sse2.cvtsd2si"]
@@ -3551,6 +3604,17 @@ mod tests {
35513604
assert_eq!(r, f32x4::new(f32::MAX, f32::MIN, 0.0,0.0));
35523605
}
35533606

3607+
#[simd_test = "sse2"]
3608+
unsafe fn _mm_cvtps_pd() {
3609+
use std::{f64, f32};
3610+
3611+
let r = sse2::_mm_cvtps_pd(f32x4::new(-1.0, 2.0, -3.0, 5.0));
3612+
assert_eq!(r, f64x2::new(-1.0, 2.0));
3613+
3614+
let r = sse2::_mm_cvtps_pd(f32x4::new(f32::MAX, f32::INFINITY, f32::NEG_INFINITY, f32::MIN));
3615+
assert_eq!(r, f64x2::new(f32::MAX as f64, f64::INFINITY));
3616+
}
3617+
35543618
#[simd_test = "sse2"]
35553619
unsafe fn _mm_cvtpd_epi32() {
35563620
use std::{f64, i32};
@@ -3689,6 +3753,42 @@ mod tests {
36893753
assert_eq!(r, i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN));
36903754
}
36913755

3756+
#[simd_test = "sse2"]
3757+
unsafe fn _mm_set_sd() {
3758+
let r = sse2::_mm_set_sd(-1.0_f64);
3759+
assert_eq!(r, f64x2::new(-1.0_f64, 0_f64));
3760+
}
3761+
3762+
#[simd_test = "sse2"]
3763+
unsafe fn _mm_set1_pd() {
3764+
let r = sse2::_mm_set1_pd(-1.0_f64);
3765+
assert_eq!(r, f64x2::new(-1.0_f64, -1.0_f64));
3766+
}
3767+
3768+
#[simd_test = "sse2"]
3769+
unsafe fn _mm_set_pd1() {
3770+
let r = sse2::_mm_set_pd1(-2.0_f64);
3771+
assert_eq!(r, f64x2::new(-2.0_f64, -2.0_f64));
3772+
}
3773+
3774+
#[simd_test = "sse2"]
3775+
unsafe fn _mm_set_pd() {
3776+
let r = sse2::_mm_set_pd(1.0_f64, 5.0_f64);
3777+
assert_eq!(r, f64x2::new(1.0_f64, 5.0_f64));
3778+
}
3779+
3780+
#[simd_test = "sse2"]
3781+
unsafe fn _mm_setr_pd() {
3782+
let r = sse2::_mm_setr_pd(1.0_f64, -5.0_f64);
3783+
assert_eq!(r, f64x2::new(-5.0_f64, 1.0_f64));
3784+
}
3785+
3786+
#[simd_test = "sse2"]
3787+
unsafe fn _mm_setzero_pd() {
3788+
let r = sse2::_mm_setzero_pd();
3789+
assert_eq!(r, f64x2::new(0_f64, 0_f64));
3790+
}
3791+
36923792
#[simd_test = "sse2"]
36933793
unsafe fn _mm_load1_pd() {
36943794
let d = -5.0;

0 commit comments

Comments
 (0)