Skip to content

Commit 2dbe8d0

Browse files
gwennalexcrichton
authored andcommitted
* avx: _mm256_zextps128_ps256 * avx: _mm256_zextpd128_pd256 * avx: _mm256_set_m128 * avx: _mm256_set_m128d * avx: _mm256_castpd_ps * avx: _mm256_castps_pd * avx: _mm256_castps_si256 * avx: _mm256_castsi256_ps * avx: _mm256_zextsi128_si256 * avx: _mm256_set_m128i
1 parent 13d2384 commit 2dbe8d0

File tree

1 file changed

+178
-1
lines changed

1 file changed

+178
-1
lines changed

src/x86/avx.rs

+178-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ use stdsimd_test::assert_instr;
77
use simd_llvm::{simd_cast, simd_shuffle2, simd_shuffle4, simd_shuffle8};
88
use v128::{f32x4, f64x2, i32x4, i64x2};
99
use v256::*;
10+
use x86::{__m128i, __m256i};
1011

1112
/// Add packed double-precision (64-bit) floating-point elements
1213
/// in `a` and `b`.
@@ -1827,6 +1828,34 @@ pub unsafe fn _mm256_set1_epi64x(a: i64) -> i64x4 {
18271828
i64x4::new(a, a, a, a)
18281829
}
18291830

1831+
/// Cast vector of type __m256d to type __m256.
1832+
#[inline(always)]
1833+
#[target_feature = "+avx"]
1834+
pub unsafe fn _mm256_castpd_ps(a: f64x4) -> f32x8 {
1835+
mem::transmute(a)
1836+
}
1837+
1838+
/// Cast vector of type __m256 to type __m256d.
1839+
#[inline(always)]
1840+
#[target_feature = "+avx"]
1841+
pub unsafe fn _mm256_castps_pd(a: f32x8) -> f64x4 {
1842+
mem::transmute(a)
1843+
}
1844+
1845+
/// Casts vector of type __m256 to type __m256i.
1846+
#[inline(always)]
1847+
#[target_feature = "+avx"]
1848+
pub unsafe fn _mm256_castps_si256(a: f32x8) -> i64x4 {
1849+
mem::transmute(a)
1850+
}
1851+
1852+
/// Casts vector of type __m256i to type __m256.
1853+
#[inline(always)]
1854+
#[target_feature = "+avx"]
1855+
pub unsafe fn _mm256_castsi256_ps(a: i64x4) -> f32x8 {
1856+
mem::transmute(a)
1857+
}
1858+
18301859
/// Casts vector of type __m256d to type __m256i.
18311860
/// This intrinsic is only used for compilation and does not generate any
18321861
/// instructions, thus it has zero latency.
@@ -1899,6 +1928,37 @@ pub unsafe fn _mm256_castsi128_si256(a: i64x2) -> i64x4 {
18991928
simd_shuffle4(a, a, [0, 1, 0, 0])
19001929
}
19011930

1931+
/// Constructs a 256-bit floating-point vector of [8 x float] from a
1932+
/// 128-bit floating-point vector of [4 x float]. The lower 128 bits contain
1933+
/// the value of the source vector. The upper 128 bits are set to zero.
1934+
#[inline(always)]
1935+
#[target_feature = "+avx,+sse"]
1936+
pub unsafe fn _mm256_zextps128_ps256(a: f32x4) -> f32x8 {
1937+
use x86::sse::_mm_setzero_ps;
1938+
simd_shuffle8(a, _mm_setzero_ps(), [0, 1, 2, 3, 4, 5, 6, 7])
1939+
}
1940+
1941+
/// Constructs a 256-bit integer vector from a 128-bit integer vector.
1942+
/// The lower 128 bits contain the value of the source vector. The upper
1943+
/// 128 bits are set to zero.
1944+
#[inline(always)]
1945+
#[target_feature = "+avx,+sse2"]
1946+
pub unsafe fn _mm256_zextsi128_si256(a: i64x2) -> i64x4 {
1947+
use x86::sse2::_mm_setzero_si128;
1948+
simd_shuffle4(a, mem::transmute(_mm_setzero_si128()), [0, 1, 2, 3])
1949+
}
1950+
1951+
/// Constructs a 256-bit floating-point vector of [4 x double] from a
1952+
/// 128-bit floating-point vector of [2 x double]. The lower 128 bits
1953+
/// contain the value of the source vector. The upper 128 bits are set
1954+
/// to zero.
1955+
#[inline(always)]
1956+
#[target_feature = "+avx,+sse2"]
1957+
pub unsafe fn _mm256_zextpd128_pd256(a: f64x2) -> f64x4 {
1958+
use x86::sse2::_mm_setzero_pd;
1959+
simd_shuffle4(a, _mm_setzero_pd(), [0, 1, 2, 3])
1960+
}
1961+
19021962
/// Return vector of type `f32x8` with undefined elements.
19031963
#[inline(always)]
19041964
#[target_feature = "+avx"]
@@ -1920,6 +1980,34 @@ pub unsafe fn _mm256_undefined_si256() -> i64x4 {
19201980
i64x4::splat(mem::uninitialized())
19211981
}
19221982

1983+
/// Set packed __m256 returned vector with the supplied values.
1984+
#[inline(always)]
1985+
#[target_feature = "+avx"]
1986+
#[cfg_attr(test, assert_instr(vinsertf128))]
1987+
pub unsafe fn _mm256_set_m128(hi: f32x4, lo: f32x4) -> f32x8 {
1988+
simd_shuffle8(lo, hi, [0, 1, 2, 3, 4, 5, 6, 7])
1989+
}
1990+
1991+
/// Set packed __m256d returned vector with the supplied values.
1992+
#[inline(always)]
1993+
#[target_feature = "+avx"]
1994+
#[cfg_attr(test, assert_instr(vinsertf128))]
1995+
pub unsafe fn _mm256_set_m128d(hi: f64x2, lo: f64x2) -> f64x4 {
1996+
let hi: f32x4 = mem::transmute(hi);
1997+
let lo: f32x4 = mem::transmute(lo);
1998+
mem::transmute(_mm256_set_m128(hi, lo))
1999+
}
2000+
2001+
/// Set packed __m256i returned vector with the supplied values.
2002+
#[inline(always)]
2003+
#[target_feature = "+avx"]
2004+
#[cfg_attr(test, assert_instr(vinsertf128))]
2005+
pub unsafe fn _mm256_set_m128i(hi: __m128i, lo: __m128i) -> __m256i {
2006+
let hi: f32x4 = mem::transmute(hi);
2007+
let lo: f32x4 = mem::transmute(lo);
2008+
mem::transmute(_mm256_set_m128(hi, lo))
2009+
}
2010+
19232011
/// LLVM intrinsics used in the above functions
19242012
#[allow(improper_ctypes)]
19252013
extern "C" {
@@ -2070,7 +2158,7 @@ mod tests {
20702158
use stdsimd_test::simd_test;
20712159
use test::black_box; // Used to inhibit constant-folding.
20722160

2073-
use v128::{f32x4, f64x2, i32x4, i64x2};
2161+
use v128::{f32x4, f64x2, i8x16, i32x4, i64x2};
20742162
use v256::*;
20752163
use x86::avx;
20762164

@@ -3390,6 +3478,38 @@ mod tests {
33903478
assert_eq!(r, i64x4::splat(1));
33913479
}
33923480

3481+
#[simd_test = "avx"]
3482+
unsafe fn _mm256_castpd_ps() {
3483+
let a = f64x4::new(1., 2., 3., 4.);
3484+
let r = avx::_mm256_castpd_ps(a);
3485+
let e = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
3486+
assert_eq!(r, e);
3487+
}
3488+
3489+
#[simd_test = "avx"]
3490+
unsafe fn _mm256_castps_pd() {
3491+
let a = f32x8::new(0., 1.875, 0., 2., 0., 2.125, 0., 2.25);
3492+
let r = avx::_mm256_castps_pd(a);
3493+
let e = f64x4::new(1., 2., 3., 4.);
3494+
assert_eq!(r, e);
3495+
}
3496+
3497+
#[simd_test = "avx"]
3498+
unsafe fn _mm256_castps_si256() {
3499+
let a = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
3500+
let r = avx::_mm256_castps_si256(a);
3501+
let e = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
3502+
assert_eq!(r, e);
3503+
}
3504+
3505+
#[simd_test = "avx"]
3506+
unsafe fn _mm256_castsi256_ps() {
3507+
let a = i64x4::new(4611686019492741120, 4647714816524288000, 4665729215040061440, 4683743613553737728);
3508+
let r = avx::_mm256_castsi256_ps(a);
3509+
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
3510+
assert_eq!(r, e);
3511+
}
3512+
33933513
#[simd_test = "avx"]
33943514
unsafe fn _mm256_castpd_si256() {
33953515
let a = f64x4::new(1., 2., 3., 4.);
@@ -3424,4 +3544,61 @@ mod tests {
34243544
let r = avx::_mm256_castsi256_si128(a);
34253545
assert_eq!(r, i64x2::new(1, 2));
34263546
}
3547+
3548+
#[simd_test = "avx"]
3549+
unsafe fn _mm256_zextps128_ps256() {
3550+
let a = f32x4::new(1., 2., 3., 4.);
3551+
let r = avx::_mm256_zextps128_ps256(a);
3552+
let e = f32x8::new(1., 2., 3., 4., 0., 0., 0., 0.);
3553+
assert_eq!(r, e);
3554+
}
3555+
3556+
#[simd_test = "avx"]
3557+
unsafe fn _mm256_zextsi128_si256() {
3558+
let a = i64x2::new(1, 2);
3559+
let r = avx::_mm256_zextsi128_si256(a);
3560+
let e = i64x4::new(1, 2, 0, 0);
3561+
assert_eq!(r, e);
3562+
}
3563+
3564+
#[simd_test = "avx"]
3565+
unsafe fn _mm256_zextpd128_pd256() {
3566+
let a = f64x2::new(1., 2.);
3567+
let r = avx::_mm256_zextpd128_pd256(a);
3568+
let e = f64x4::new(1., 2., 0., 0.);
3569+
assert_eq!(r, e);
3570+
}
3571+
3572+
#[simd_test = "avx"]
3573+
unsafe fn _mm256_set_m128() {
3574+
let hi = f32x4::new(5., 6., 7., 8.);
3575+
let lo = f32x4::new(1., 2., 3., 4.);
3576+
let r = avx::_mm256_set_m128(hi, lo);
3577+
let e = f32x8::new(1., 2., 3., 4., 5., 6., 7., 8.);
3578+
assert_eq!(r, e);
3579+
}
3580+
3581+
#[simd_test = "avx"]
3582+
unsafe fn _mm256_set_m128d() {
3583+
let hi = f64x2::new(3., 4.);
3584+
let lo = f64x2::new(1., 2.);
3585+
let r = avx::_mm256_set_m128d(hi, lo);
3586+
let e = f64x4::new(1., 2., 3., 4.);
3587+
assert_eq!(r, e);
3588+
}
3589+
3590+
#[simd_test = "avx"]
3591+
unsafe fn _mm256_set_m128i() {
3592+
let hi = i8x16::new(17, 18, 19, 20, 21, 22, 23, 24,
3593+
25, 26, 27, 28, 29, 30, 31, 32);
3594+
let lo = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8,
3595+
9, 10, 11, 12, 13, 14, 15, 16);
3596+
let r = avx::_mm256_set_m128i(hi, lo);
3597+
let e = i8x32::new(
3598+
1, 2, 3, 4, 5, 6, 7, 8,
3599+
9, 10, 11, 12, 13, 14, 15, 16,
3600+
17, 18, 19, 20, 21, 22, 23, 24,
3601+
25, 26, 27, 28, 29, 30, 31, 32);
3602+
assert_eq!(r, e);
3603+
}
34273604
}

0 commit comments

Comments
 (0)