@@ -1537,6 +1537,37 @@ pub unsafe fn _mm256_lddqu_si256(mem_addr: *const i8x32) -> i8x32 {
1537
1537
vlddqu ( mem_addr as * const i8 )
1538
1538
}
1539
1539
1540
+ /// Moves integer data from a 256-bit integer vector to a 32-byte
1541
+ /// aligned memory location. To minimize caching, the data is flagged as
1542
+ /// non-temporal (unlikely to be used again soon)
1543
+ #[ inline( always) ]
1544
+ #[ target_feature = "+avx" ]
1545
+ #[ cfg_attr( test, assert_instr( vmovntps) ) ] // FIXME vmovntdq
1546
+ pub unsafe fn _mm256_stream_si256 ( mem_addr : * const __m256i , a : __m256i ) {
1547
+ :: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
1548
+ }
1549
+
1550
+ /// Moves double-precision values from a 256-bit vector of [4 x double]
1551
+ /// to a 32-byte aligned memory location. To minimize caching, the data is
1552
+ /// flagged as non-temporal (unlikely to be used again soon).
1553
+ #[ inline( always) ]
1554
+ #[ target_feature = "+avx" ]
1555
+ #[ cfg_attr( test, assert_instr( vmovntps) ) ] // FIXME vmovntpd
1556
+ pub unsafe fn _mm256_stream_pd ( mem_addr : * const f64 , a : f64x4 ) {
1557
+ :: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
1558
+ }
1559
+
1560
+ /// Moves single-precision floating point values from a 256-bit vector
1561
+ /// of [8 x float] to a 32-byte aligned memory location. To minimize
1562
+ /// caching, the data is flagged as non-temporal (unlikely to be used again
1563
+ /// soon).
1564
+ #[ inline( always) ]
1565
+ #[ target_feature = "+avx" ]
1566
+ #[ cfg_attr( test, assert_instr( vmovntps) ) ]
1567
+ pub unsafe fn _mm256_stream_ps ( mem_addr : * const f32 , a : f32x8 ) {
1568
+ :: core:: intrinsics:: nontemporal_store ( mem:: transmute ( mem_addr) , a) ;
1569
+ }
1570
+
1540
1571
/// Compute the approximate reciprocal of packed single-precision (32-bit)
1541
1572
/// floating-point elements in `a`, and return the results. The maximum
1542
1573
/// relative error for this approximation is less than 1.5*2^-12.
@@ -3532,6 +3563,44 @@ mod tests {
3532
3563
assert_eq ! ( r, e) ;
3533
3564
}
3534
3565
3566
+ #[ simd_test = "avx" ]
3567
+ unsafe fn _mm256_stream_si256 ( ) {
3568
+ let a = __m256i:: from ( avx:: _mm256_setr_epi64x ( 1 , 2 , 3 , 4 ) ) ;
3569
+ let mut r = avx:: _mm256_undefined_si256 ( ) ;
3570
+ avx:: _mm256_stream_si256 ( & mut r as * mut _ , a) ;
3571
+ assert_eq ! ( r, a) ;
3572
+ }
3573
+
3574
+ #[ simd_test = "avx" ]
3575
+ unsafe fn _mm256_stream_pd ( ) {
3576
+ #[ repr( align( 32 ) ) ]
3577
+ struct Memory {
3578
+ pub data : [ f64 ; 4 ] ,
3579
+ }
3580
+ let a = f64x4:: splat ( 7.0 ) ;
3581
+ let mut mem = Memory { data : [ -1.0 ; 4 ] } ;
3582
+
3583
+ avx:: _mm256_stream_pd ( & mut mem. data [ 0 ] as * mut f64 , a) ;
3584
+ for i in 0 ..4 {
3585
+ assert_eq ! ( mem. data[ i] , a. extract( i as u32 ) ) ;
3586
+ }
3587
+ }
3588
+
3589
+ #[ simd_test = "avx" ]
3590
+ unsafe fn _mm256_stream_ps ( ) {
3591
+ #[ repr( align( 32 ) ) ]
3592
+ struct Memory {
3593
+ pub data : [ f32 ; 8 ] ,
3594
+ }
3595
+ let a = f32x8:: splat ( 7.0 ) ;
3596
+ let mut mem = Memory { data : [ -1.0 ; 8 ] } ;
3597
+
3598
+ avx:: _mm256_stream_ps ( & mut mem. data [ 0 ] as * mut f32 , a) ;
3599
+ for i in 0 ..8 {
3600
+ assert_eq ! ( mem. data[ i] , a. extract( i as u32 ) ) ;
3601
+ }
3602
+ }
3603
+
3535
3604
#[ simd_test = "avx" ]
3536
3605
unsafe fn _mm256_rcp_ps ( ) {
3537
3606
let a = f32x8:: new ( 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. ) ;
0 commit comments