Skip to content

Commit cee4f5f

Browse files
gwennalexcrichton
authored andcommitted
sse2: _mm_stream_* (rust-lang#228)
* sse2: _mm_stream_si128,si32,pd,si64 * sse2: _mm_stream_* tests * Disable assert_instr for _mm_stream_si64
1 parent 61a9083 commit cee4f5f

File tree

2 files changed

+81
-0
lines changed

2 files changed

+81
-0
lines changed

coresimd/src/x86/i586/sse2.rs

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -911,6 +911,26 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
911911
);
912912
}
913913

914+
/// Stores a 128-bit integer vector to a 128-bit aligned memory location.
915+
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
916+
/// used again soon).
917+
#[inline(always)]
918+
#[target_feature = "+sse2"]
919+
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq
920+
pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) {
921+
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
922+
}
923+
924+
/// Stores a 32-bit integer value in the specified memory location.
925+
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
926+
/// used again soon).
927+
#[inline(always)]
928+
#[target_feature = "+sse2"]
929+
#[cfg_attr(test, assert_instr(movnti))]
930+
pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) {
931+
::core::intrinsics::nontemporal_store(mem_addr, a);
932+
}
933+
914934
/// Return a vector where the low element is extracted from `a` and its upper
915935
/// element is zero.
916936
#[inline(always)]
@@ -1845,6 +1865,17 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
18451865
*(mem_addr as *const f64x2)
18461866
}
18471867

1868+
/// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
1869+
/// aligned memory location.
1870+
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
1871+
/// used again soon).
1872+
#[inline(always)]
1873+
#[target_feature = "+sse2"]
1874+
#[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd
1875+
pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: f64x2) {
1876+
::core::intrinsics::nontemporal_store(mem::transmute(mem_addr), a);
1877+
}
1878+
18481879
/// Store 128-bits (composed of 2 packed double-precision (64-bit)
18491880
/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
18501881
/// on a 16-byte boundary or a general-protection exception may be generated.
@@ -3023,6 +3054,22 @@ mod tests {
30233054
assert_eq!(r, __m128i::from(i64x2::new(2, 0)));
30243055
}
30253056

3057+
#[simd_test = "sse2"]
3058+
unsafe fn _mm_stream_si128() {
3059+
let a = __m128i::from(sse2::_mm_setr_epi32(1, 2, 3, 4));
3060+
let mut r = sse2::_mm_undefined_si128();
3061+
sse2::_mm_stream_si128(&mut r as *mut _, a);
3062+
assert_eq!(r, a);
3063+
}
3064+
3065+
#[simd_test = "sse2"]
3066+
unsafe fn _mm_stream_si32() {
3067+
let a: i32 = 7;
3068+
let mut mem = ::std::boxed::Box::<i32>::new(-1);
3069+
sse2::_mm_stream_si32(&mut *mem as *mut i32, a);
3070+
assert_eq!(a, *mem);
3071+
}
3072+
30263073
#[simd_test = "sse2"]
30273074
unsafe fn _mm_move_epi64() {
30283075
let a = i64x2::new(5, 6);
@@ -3703,6 +3750,21 @@ mod tests {
37033750
assert_eq!(r, f64x2::new(1.0, 2.0));
37043751
}
37053752

3753+
#[simd_test = "sse2"]
3754+
unsafe fn _mm_stream_pd() {
3755+
#[repr(align(128))]
3756+
struct Memory {
3757+
pub data: [f64; 2],
3758+
}
3759+
let a = f64x2::splat(7.0);
3760+
let mut mem = Memory { data: [-1.0; 2] };
3761+
3762+
sse2::_mm_stream_pd(&mut mem.data[0] as *mut f64, a);
3763+
for i in 0..2 {
3764+
assert_eq!(mem.data[i], a.extract(i as u32));
3765+
}
3766+
}
3767+
37063768
#[simd_test = "sse2"]
37073769
unsafe fn _mm_store_pd() {
37083770
let mut mem = Memory { data: [0.0f64; 4] };

coresimd/src/x86/x86_64/sse2.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,17 @@ pub unsafe fn _mm_cvttsd_si64x(a: f64x2) -> i64 {
4747
_mm_cvttsd_si64(a)
4848
}
4949

50+
/// Stores a 64-bit integer value in the specified memory location.
51+
/// To minimize caching, the data is flagged as non-temporal (unlikely to be
52+
/// used again soon).
53+
#[inline(always)]
54+
#[target_feature = "+sse2"]
55+
// FIXME movnti on windows and linux x86_64
56+
//#[cfg_attr(test, assert_instr(movntiq))]
57+
pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
58+
::core::intrinsics::nontemporal_store(mem_addr, a);
59+
}
60+
5061
#[cfg(test)]
5162
mod tests {
5263
use stdsimd_test::simd_test;
@@ -88,4 +99,12 @@ mod tests {
8899
let r = sse2::_mm_cvttsd_si64x(a);
89100
assert_eq!(r, i64::MIN);
90101
}
102+
103+
#[simd_test = "sse2"]
104+
unsafe fn _mm_stream_si64() {
105+
let a: i64 = 7;
106+
let mut mem = ::std::boxed::Box::<i64>::new(-1);
107+
sse2::_mm_stream_si64(&mut *mem as *mut i64, a);
108+
assert_eq!(a, *mem);
109+
}
91110
}

0 commit comments

Comments
 (0)