forked from rust-lang/stdarch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathf16c.rs
145 lines (132 loc) · 5.77 KB
/
f16c.rs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
//! [F16C intrinsics].
//!
//! [F16C intrinsics]: https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=fp16&expand=1769
use crate::core_arch::{simd::*, x86::*};
#[cfg(test)]
use stdarch_test::assert_instr;
#[allow(improper_ctypes)]
extern "unadjusted" {
#[link_name = "llvm.x86.vcvtph2ps.128"]
fn llvm_vcvtph2ps_128(a: i16x8) -> f32x4;
#[link_name = "llvm.x86.vcvtph2ps.256"]
fn llvm_vcvtph2ps_256(a: i16x8) -> f32x8;
#[link_name = "llvm.x86.vcvtps2ph.128"]
fn llvm_vcvtps2ph_128(a: f32x4, rounding: i32) -> i16x8;
#[link_name = "llvm.x86.vcvtps2ph.256"]
fn llvm_vcvtps2ph_256(a: f32x8, rounding: i32) -> i16x8;
}
/// Converts the 4 x 16-bit half-precision float values in the lowest 64-bit of
/// the 128-bit vector `a` into 4 x 32-bit float values stored in a 128-bit wide
/// vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtph_ps)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm_cvtph_ps(a: __m128i) -> __m128 {
transmute(llvm_vcvtph2ps_128(transmute(a)))
}
/// Converts the 8 x 16-bit half-precision float values in the 128-bit vector
/// `a` into 8 x 32-bit float values stored in a 256-bit wide vector.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtph_ps)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtph2ps"))]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm256_cvtph_ps(a: __m128i) -> __m256 {
transmute(llvm_vcvtph2ps_256(transmute(a)))
}
/// Converts the 4 x 32-bit float values in the 128-bit vector `a` into 4 x
/// 16-bit half-precision float values stored in the lowest 64-bit of a 128-bit
/// vector.
///
/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
///
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_ph)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm_cvtps_ph<const IMM_ROUNDING: i32>(a: __m128) -> __m128i {
static_assert_uimm_bits!(IMM_ROUNDING, 3);
let a = a.as_f32x4();
let r = llvm_vcvtps2ph_128(a, IMM_ROUNDING);
transmute(r)
}
/// Converts the 8 x 32-bit float values in the 256-bit vector `a` into 8 x
/// 16-bit half-precision float values stored in a 128-bit wide vector.
///
/// Rounding is done according to the `imm_rounding` parameter, which can be one of:
///
/// * [`_MM_FROUND_TO_NEAREST_INT`] | [`_MM_FROUND_NO_EXC`] : round to nearest and suppress exceptions
/// * [`_MM_FROUND_TO_NEG_INF`] | [`_MM_FROUND_NO_EXC`] : round down and suppress exceptions
/// * [`_MM_FROUND_TO_POS_INF`] | [`_MM_FROUND_NO_EXC`] : round up and suppress exceptions
/// * [`_MM_FROUND_TO_ZERO`] | [`_MM_FROUND_NO_EXC`] : truncate and suppress exceptions
/// * [`_MM_FROUND_CUR_DIRECTION`] : use `MXCSR.RC` - see [`_MM_SET_ROUNDING_MODE`]
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_cvtps_ph)
#[inline]
#[target_feature(enable = "f16c")]
#[cfg_attr(test, assert_instr("vcvtps2ph", IMM_ROUNDING = 0))]
#[rustc_legacy_const_generics(1)]
#[stable(feature = "x86_f16c_intrinsics", since = "1.68.0")]
pub unsafe fn _mm256_cvtps_ph<const IMM_ROUNDING: i32>(a: __m256) -> __m128i {
static_assert_uimm_bits!(IMM_ROUNDING, 3);
let a = a.as_f32x8();
let r = llvm_vcvtps2ph_256(a, IMM_ROUNDING);
transmute(r)
}
#[cfg(test)]
mod tests {
use crate::{core_arch::x86::*, mem::transmute};
use stdarch_test::simd_test;
const F16_ONE: i16 = 0x3c00;
const F16_TWO: i16 = 0x4000;
const F16_THREE: i16 = 0x4200;
const F16_FOUR: i16 = 0x4400;
const F16_FIVE: i16 = 0x4500;
const F16_SIX: i16 = 0x4600;
const F16_SEVEN: i16 = 0x4700;
const F16_EIGHT: i16 = 0x4800;
#[simd_test(enable = "f16c")]
unsafe fn test_mm_cvtph_ps() {
let a = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
let r = _mm_cvtph_ps(a);
let e = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "f16c")]
unsafe fn test_mm256_cvtph_ps() {
let a = _mm_set_epi16(
F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
);
let r = _mm256_cvtph_ps(a);
let e = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "f16c")]
unsafe fn test_mm_cvtps_ph() {
let a = _mm_set_ps(1.0, 2.0, 3.0, 4.0);
let r = _mm_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
let e = _mm_set_epi16(0, 0, 0, 0, F16_ONE, F16_TWO, F16_THREE, F16_FOUR);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "f16c")]
unsafe fn test_mm256_cvtps_ph() {
let a = _mm256_set_ps(1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0);
let r = _mm256_cvtps_ph::<_MM_FROUND_CUR_DIRECTION>(a);
let e = _mm_set_epi16(
F16_ONE, F16_TWO, F16_THREE, F16_FOUR, F16_FIVE, F16_SIX, F16_SEVEN, F16_EIGHT,
);
assert_eq_m128i(r, e);
}
}