@@ -911,6 +911,26 @@ pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) {
911
911
) ;
912
912
}
913
913
914
+ /// Stores a 128-bit integer vector to a 128-bit aligned memory location.
915
+ /// To minimize caching, the data is flagged as non-temporal (unlikely to be
916
+ /// used again soon).
917
+ #[ inline( always) ]
918
+ #[ target_feature = "+sse2" ]
919
+ #[ cfg_attr( test, assert_instr( movntps) ) ] // FIXME movntdq
920
+ pub unsafe fn _mm_stream_si128 ( mem_addr : * mut __m128i , a : __m128i ) {
921
+ :: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
922
+ }
923
+
924
+ /// Stores a 32-bit integer value in the specified memory location.
925
+ /// To minimize caching, the data is flagged as non-temporal (unlikely to be
926
+ /// used again soon).
927
+ #[ inline( always) ]
928
+ #[ target_feature = "+sse2" ]
929
+ #[ cfg_attr( test, assert_instr( movnti) ) ]
930
+ pub unsafe fn _mm_stream_si32 ( mem_addr : * mut i32 , a : i32 ) {
931
+ :: core:: intrinsics:: nontemporal_store ( mem_addr, a) ;
932
+ }
933
+
914
934
/// Return a vector where the low element is extracted from `a` and its upper
915
935
/// element is zero.
916
936
#[ inline( always) ]
@@ -1845,6 +1865,17 @@ pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> f64x2 {
1845
1865
* ( mem_addr as * const f64x2 )
1846
1866
}
1847
1867
1868
+ /// Stores a 128-bit floating point vector of [2 x double] to a 128-bit
1869
+ /// aligned memory location.
1870
+ /// To minimize caching, the data is flagged as non-temporal (unlikely to be
1871
+ /// used again soon).
1872
+ #[ inline( always) ]
1873
+ #[ target_feature = "+sse2" ]
1874
+ #[ cfg_attr( test, assert_instr( movntps) ) ] // FIXME movntpd
1875
+ pub unsafe fn _mm_stream_pd ( mem_addr : * mut f64 , a : f64x2 ) {
1876
+ :: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
1877
+ }
1878
+
1848
1879
/// Store 128-bits (composed of 2 packed double-precision (64-bit)
1849
1880
/// floating-point elements) from `a` into memory. `mem_addr` must be aligned
1850
1881
/// on a 16-byte boundary or a general-protection exception may be generated.
@@ -3023,6 +3054,22 @@ mod tests {
3023
3054
assert_eq ! ( r, __m128i:: from( i64x2:: new( 2 , 0 ) ) ) ;
3024
3055
}
3025
3056
3057
+ #[ simd_test = "sse2" ]
3058
+ unsafe fn _mm_stream_si128 ( ) {
3059
+ let a = __m128i:: from ( sse2:: _mm_setr_epi32 ( 1 , 2 , 3 , 4 ) ) ;
3060
+ let mut r = sse2:: _mm_undefined_si128 ( ) ;
3061
+ sse2:: _mm_stream_si128 ( & mut r as * mut _ , a) ;
3062
+ assert_eq ! ( r, a) ;
3063
+ }
3064
+
3065
+ #[ simd_test = "sse2" ]
3066
+ unsafe fn _mm_stream_si32 ( ) {
3067
+ let a: i32 = 7 ;
3068
+ let mut mem = :: std:: boxed:: Box :: < i32 > :: new ( -1 ) ;
3069
+ sse2:: _mm_stream_si32 ( & mut * mem as * mut i32 , a) ;
3070
+ assert_eq ! ( a, * mem) ;
3071
+ }
3072
+
3026
3073
#[ simd_test = "sse2" ]
3027
3074
unsafe fn _mm_move_epi64 ( ) {
3028
3075
let a = i64x2:: new ( 5 , 6 ) ;
@@ -3703,6 +3750,21 @@ mod tests {
3703
3750
assert_eq ! ( r, f64x2:: new( 1.0 , 2.0 ) ) ;
3704
3751
}
3705
3752
3753
+ #[ simd_test = "sse2" ]
3754
+ unsafe fn _mm_stream_pd ( ) {
3755
+ #[ repr( align( 128 ) ) ]
3756
+ struct Memory {
3757
+ pub data : [ f64 ; 2 ] ,
3758
+ }
3759
+ let a = f64x2:: splat ( 7.0 ) ;
3760
+ let mut mem = Memory { data : [ -1.0 ; 2 ] } ;
3761
+
3762
+ sse2:: _mm_stream_pd ( & mut mem. data [ 0 ] as * mut f64 , a) ;
3763
+ for i in 0 ..2 {
3764
+ assert_eq ! ( mem. data[ i] , a. extract( i as u32 ) ) ;
3765
+ }
3766
+ }
3767
+
3706
3768
#[ simd_test = "sse2" ]
3707
3769
unsafe fn _mm_store_pd ( ) {
3708
3770
let mut mem = Memory { data : [ 0.0f64 ; 4 ] } ;
0 commit comments