@@ -7,6 +7,7 @@ use stdsimd_test::assert_instr;
7
7
use simd_llvm:: { simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8} ;
8
8
use v128:: { f32x4, f64x2, i32x4, i64x2} ;
9
9
use v256:: * ;
10
+ use x86:: { __m128i, __m256i} ;
10
11
11
12
/// Add packed double-precision (64-bit) floating-point elements
12
13
/// in `a` and `b`.
@@ -1827,6 +1828,34 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 {
1827
1828
i64x4:: new ( a, a, a, a)
1828
1829
}
1829
1830
1831
+ /// Cast vector of type __m256d to type __m256.
1832
+ #[ inline( always) ]
1833
+ #[ target_feature = "+avx" ]
1834
+ pub unsafe fn _mm256_castpd_ps ( a : f64x4 ) -> f32x8 {
1835
+ mem:: transmute ( a)
1836
+ }
1837
+
1838
+ /// Cast vector of type __m256 to type __m256d.
1839
+ #[ inline( always) ]
1840
+ #[ target_feature = "+avx" ]
1841
+ pub unsafe fn _mm256_castps_pd ( a : f32x8 ) -> f64x4 {
1842
+ mem:: transmute ( a)
1843
+ }
1844
+
1845
+ /// Casts vector of type __m256 to type __m256i.
1846
+ #[ inline( always) ]
1847
+ #[ target_feature = "+avx" ]
1848
+ pub unsafe fn _mm256_castps_si256 ( a : f32x8 ) -> i64x4 {
1849
+ mem:: transmute ( a)
1850
+ }
1851
+
1852
+ /// Casts vector of type __m256i to type __m256.
1853
+ #[ inline( always) ]
1854
+ #[ target_feature = "+avx" ]
1855
+ pub unsafe fn _mm256_castsi256_ps ( a : i64x4 ) -> f32x8 {
1856
+ mem:: transmute ( a)
1857
+ }
1858
+
1830
1859
/// Casts vector of type __m256d to type __m256i.
1831
1860
/// This intrinsic is only used for compilation and does not generate any
1832
1861
/// instructions, thus it has zero latency.
@@ -1899,6 +1928,37 @@ pub unsafe fn _mm256_castsi128_si256(a: i64x2) -> i64x4 {
1899
1928
simd_shuffle4 ( a, a, [ 0 , 1 , 0 , 0 ] )
1900
1929
}
1901
1930
1931
+ /// Constructs a 256-bit floating-point vector of [8 x float] from a
1932
+ /// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
1933
+ /// the value of the source vector. The upper 128 bits are set to zero.
1934
+ #[ inline( always) ]
1935
+ #[ target_feature = "+avx,+sse" ]
1936
+ pub unsafe fn _mm256_zextps128_ps256 ( a : f32x4 ) -> f32x8 {
1937
+ use x86:: sse:: _mm_setzero_ps;
1938
+ simd_shuffle8 ( a, _mm_setzero_ps ( ) , [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
1939
+ }
1940
+
1941
+ /// Constructs a 256-bit integer vector from a 128-bit integer vector.
1942
+ /// The lower 128 bits contain the value of the source vector. The upper
1943
+ /// 128 bits are set to zero.
1944
+ #[ inline( always) ]
1945
+ #[ target_feature = "+avx,+sse2" ]
1946
+ pub unsafe fn _mm256_zextsi128_si256 ( a : i64x2 ) -> i64x4 {
1947
+ use x86:: sse2:: _mm_setzero_si128;
1948
+ simd_shuffle4 ( a, mem:: transmute ( _mm_setzero_si128 ( ) ) , [ 0 , 1 , 2 , 3 ] )
1949
+ }
1950
+
1951
+ /// Constructs a 256-bit floating-point vector of [4 x double] from a
1952
+ /// 128-bit floating-point vector of [2 x double]. The lower 128 bits
1953
+ /// contain the value of the source vector. The upper 128 bits are set
1954
+ /// to zero.
1955
+ #[ inline( always) ]
1956
+ #[ target_feature = "+avx,+sse2" ]
1957
+ pub unsafe fn _mm256_zextpd128_pd256 ( a : f64x2 ) -> f64x4 {
1958
+ use x86:: sse2:: _mm_setzero_pd;
1959
+ simd_shuffle4 ( a, _mm_setzero_pd ( ) , [ 0 , 1 , 2 , 3 ] )
1960
+ }
1961
+
1902
1962
/// Return vector of type `f32x8` with undefined elements.
1903
1963
#[ inline( always) ]
1904
1964
#[ target_feature = "+avx" ]
@@ -1920,6 +1980,34 @@ pub unsafe fn _mm256_undefined_si256() -> i64x4 {
1920
1980
i64x4:: splat ( mem:: uninitialized ( ) )
1921
1981
}
1922
1982
1983
+ /// Set packed __m256 returned vector with the supplied values.
1984
+ #[ inline( always) ]
1985
+ #[ target_feature = "+avx" ]
1986
+ #[ cfg_attr( test, assert_instr( vinsertf128) ) ]
1987
+ pub unsafe fn _mm256_set_m128 ( hi : f32x4 , lo : f32x4 ) -> f32x8 {
1988
+ simd_shuffle8 ( lo, hi, [ 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 ] )
1989
+ }
1990
+
1991
+ /// Set packed __m256d returned vector with the supplied values.
1992
+ #[ inline( always) ]
1993
+ #[ target_feature = "+avx" ]
1994
+ #[ cfg_attr( test, assert_instr( vinsertf128) ) ]
1995
+ pub unsafe fn _mm256_set_m128d ( hi : f64x2 , lo : f64x2 ) -> f64x4 {
1996
+ let hi: f32x4 = mem:: transmute ( hi) ;
1997
+ let lo: f32x4 = mem:: transmute ( lo) ;
1998
+ mem:: transmute ( _mm256_set_m128 ( hi, lo) )
1999
+ }
2000
+
2001
+ /// Set packed __m256i returned vector with the supplied values.
2002
+ #[ inline( always) ]
2003
+ #[ target_feature = "+avx" ]
2004
+ #[ cfg_attr( test, assert_instr( vinsertf128) ) ]
2005
+ pub unsafe fn _mm256_set_m128i ( hi : __m128i , lo : __m128i ) -> __m256i {
2006
+ let hi: f32x4 = mem:: transmute ( hi) ;
2007
+ let lo: f32x4 = mem:: transmute ( lo) ;
2008
+ mem:: transmute ( _mm256_set_m128 ( hi, lo) )
2009
+ }
2010
+
1923
2011
/// LLVM intrinsics used in the above functions
1924
2012
#[ allow( improper_ctypes) ]
1925
2013
extern "C" {
@@ -2070,7 +2158,7 @@ mod tests {
2070
2158
use stdsimd_test:: simd_test;
2071
2159
use test:: black_box; // Used to inhibit constant-folding.
2072
2160
2073
- use v128:: { f32x4, f64x2, i32x4, i64x2} ;
2161
+ use v128:: { f32x4, f64x2, i8x16 , i32x4, i64x2} ;
2074
2162
use v256:: * ;
2075
2163
use x86:: avx;
2076
2164
@@ -3390,6 +3478,38 @@ mod tests {
3390
3478
assert_eq ! ( r, i64x4:: splat( 1 ) ) ;
3391
3479
}
3392
3480
3481
+ #[ simd_test = "avx" ]
3482
+ unsafe fn _mm256_castpd_ps ( ) {
3483
+ let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3484
+ let r = avx:: _mm256_castpd_ps ( a) ;
3485
+ let e = f32x8:: new ( 0. , 1.875 , 0. , 2. , 0. , 2.125 , 0. , 2.25 ) ;
3486
+ assert_eq ! ( r, e) ;
3487
+ }
3488
+
3489
+ #[ simd_test = "avx" ]
3490
+ unsafe fn _mm256_castps_pd ( ) {
3491
+ let a = f32x8:: new ( 0. , 1.875 , 0. , 2. , 0. , 2.125 , 0. , 2.25 ) ;
3492
+ let r = avx:: _mm256_castps_pd ( a) ;
3493
+ let e = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3494
+ assert_eq ! ( r, e) ;
3495
+ }
3496
+
3497
+ #[ simd_test = "avx" ]
3498
+ unsafe fn _mm256_castps_si256 ( ) {
3499
+ let a = f32x8:: new ( 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. ) ;
3500
+ let r = avx:: _mm256_castps_si256 ( a) ;
3501
+ let e = i64x4:: new ( 4611686019492741120 , 4647714816524288000 , 4665729215040061440 , 4683743613553737728 ) ;
3502
+ assert_eq ! ( r, e) ;
3503
+ }
3504
+
3505
+ #[ simd_test = "avx" ]
3506
+ unsafe fn _mm256_castsi256_ps ( ) {
3507
+ let a = i64x4:: new ( 4611686019492741120 , 4647714816524288000 , 4665729215040061440 , 4683743613553737728 ) ;
3508
+ let r = avx:: _mm256_castsi256_ps ( a) ;
3509
+ let e = f32x8:: new ( 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. ) ;
3510
+ assert_eq ! ( r, e) ;
3511
+ }
3512
+
3393
3513
#[ simd_test = "avx" ]
3394
3514
unsafe fn _mm256_castpd_si256 ( ) {
3395
3515
let a = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
@@ -3424,4 +3544,61 @@ mod tests {
3424
3544
let r = avx:: _mm256_castsi256_si128 ( a) ;
3425
3545
assert_eq ! ( r, i64x2:: new( 1 , 2 ) ) ;
3426
3546
}
3547
+
3548
+ #[ simd_test = "avx" ]
3549
+ unsafe fn _mm256_zextps128_ps256 ( ) {
3550
+ let a = f32x4:: new ( 1. , 2. , 3. , 4. ) ;
3551
+ let r = avx:: _mm256_zextps128_ps256 ( a) ;
3552
+ let e = f32x8:: new ( 1. , 2. , 3. , 4. , 0. , 0. , 0. , 0. ) ;
3553
+ assert_eq ! ( r, e) ;
3554
+ }
3555
+
3556
+ #[ simd_test = "avx" ]
3557
+ unsafe fn _mm256_zextsi128_si256 ( ) {
3558
+ let a = i64x2:: new ( 1 , 2 ) ;
3559
+ let r = avx:: _mm256_zextsi128_si256 ( a) ;
3560
+ let e = i64x4:: new ( 1 , 2 , 0 , 0 ) ;
3561
+ assert_eq ! ( r, e) ;
3562
+ }
3563
+
3564
+ #[ simd_test = "avx" ]
3565
+ unsafe fn _mm256_zextpd128_pd256 ( ) {
3566
+ let a = f64x2:: new ( 1. , 2. ) ;
3567
+ let r = avx:: _mm256_zextpd128_pd256 ( a) ;
3568
+ let e = f64x4:: new ( 1. , 2. , 0. , 0. ) ;
3569
+ assert_eq ! ( r, e) ;
3570
+ }
3571
+
3572
+ #[ simd_test = "avx" ]
3573
+ unsafe fn _mm256_set_m128 ( ) {
3574
+ let hi = f32x4:: new ( 5. , 6. , 7. , 8. ) ;
3575
+ let lo = f32x4:: new ( 1. , 2. , 3. , 4. ) ;
3576
+ let r = avx:: _mm256_set_m128 ( hi, lo) ;
3577
+ let e = f32x8:: new ( 1. , 2. , 3. , 4. , 5. , 6. , 7. , 8. ) ;
3578
+ assert_eq ! ( r, e) ;
3579
+ }
3580
+
3581
+ #[ simd_test = "avx" ]
3582
+ unsafe fn _mm256_set_m128d ( ) {
3583
+ let hi = f64x2:: new ( 3. , 4. ) ;
3584
+ let lo = f64x2:: new ( 1. , 2. ) ;
3585
+ let r = avx:: _mm256_set_m128d ( hi, lo) ;
3586
+ let e = f64x4:: new ( 1. , 2. , 3. , 4. ) ;
3587
+ assert_eq ! ( r, e) ;
3588
+ }
3589
+
3590
+ #[ simd_test = "avx" ]
3591
+ unsafe fn _mm256_set_m128i ( ) {
3592
+ let hi = i8x16:: new ( 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
3593
+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ) ;
3594
+ let lo = i8x16:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
3595
+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ) ;
3596
+ let r = avx:: _mm256_set_m128i ( hi, lo) ;
3597
+ let e = i8x32:: new (
3598
+ 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ,
3599
+ 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ,
3600
+ 17 , 18 , 19 , 20 , 21 , 22 , 23 , 24 ,
3601
+ 25 , 26 , 27 , 28 , 29 , 30 , 31 , 32 ) ;
3602
+ assert_eq ! ( r, e) ;
3603
+ }
3427
3604
}
0 commit comments