Skip to content

Commit 69526d4

Browse files
committed
Implement some float simd intrinsics
1 parent 8691b8b commit 69526d4

File tree

4 files changed

+187
-64
lines changed

4 files changed

+187
-64
lines changed

example/std_example.rs

+33-8
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#![feature(core_intrinsics)]
22

3+
use std::arch::x86_64::*;
34
use std::io::Write;
45
use std::intrinsics;
56

@@ -52,8 +53,6 @@ fn main() {
5253

5354
#[target_feature(enable = "sse2")]
5455
unsafe fn test_simd() {
55-
use std::arch::x86_64::*;
56-
5756
let x = _mm_setzero_si128();
5857
let y = _mm_set1_epi16(7);
5958
let or = _mm_or_si128(x, y);
@@ -67,15 +66,15 @@ unsafe fn test_simd() {
6766
test_mm_slli_si128();
6867
test_mm_movemask_epi8();
6968
test_mm256_movemask_epi8();
69+
test_mm_add_epi8();
70+
test_mm_add_pd();
7071

7172
let mask1 = _mm_movemask_epi8(dbg!(_mm_setr_epi8(255u8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0)));
7273
assert_eq!(mask1, 1);
7374
}
7475

7576
#[target_feature(enable = "sse2")]
7677
unsafe fn test_mm_slli_si128() {
77-
use std::arch::x86_64::*;
78-
7978
#[rustfmt::skip]
8079
let a = _mm_setr_epi8(
8180
1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
@@ -116,8 +115,6 @@ unsafe fn test_mm_slli_si128() {
116115

117116
#[target_feature(enable = "sse2")]
118117
unsafe fn test_mm_movemask_epi8() {
119-
use std::arch::x86_64::*;
120-
121118
#[rustfmt::skip]
122119
let a = _mm_setr_epi8(
123120
0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01,
@@ -131,20 +128,48 @@ unsafe fn test_mm_movemask_epi8() {
131128

132129
#[target_feature(enable = "avx2")]
133130
unsafe fn test_mm256_movemask_epi8() {
134-
use std::arch::x86_64::*;
135-
136131
let a = _mm256_set1_epi8(-1);
137132
let r = _mm256_movemask_epi8(a);
138133
let e = -1;
139134
assert_eq!(r, e);
140135
}
141136

137+
#[target_feature(enable = "sse2")]
138+
unsafe fn test_mm_add_epi8() {
139+
let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
140+
#[rustfmt::skip]
141+
let b = _mm_setr_epi8(
142+
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
143+
);
144+
let r = _mm_add_epi8(a, b);
145+
#[rustfmt::skip]
146+
let e = _mm_setr_epi8(
147+
16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46,
148+
);
149+
assert_eq_m128i(r, e);
150+
}
151+
152+
#[target_feature(enable = "sse2")]
153+
unsafe fn test_mm_add_pd() {
154+
let a = _mm_setr_pd(1.0, 2.0);
155+
let b = _mm_setr_pd(5.0, 10.0);
156+
let r = _mm_add_pd(a, b);
157+
assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0));
158+
}
159+
142160
fn assert_eq_m128i(x: std::arch::x86_64::__m128i, y: std::arch::x86_64::__m128i) {
143161
unsafe {
144162
assert_eq!(std::mem::transmute::<_, [u8; 16]>(x), std::mem::transmute::<_, [u8; 16]>(x));
145163
}
146164
}
147165

166+
#[target_feature(enable = "sse2")]
167+
pub unsafe fn assert_eq_m128d(a: __m128d, b: __m128d) {
168+
if _mm_movemask_pd(_mm_cmpeq_pd(a, b)) != 0b11 {
169+
panic!("{:?} != {:?}", a, b);
170+
}
171+
}
172+
148173
#[derive(PartialEq)]
149174
enum LoopState {
150175
Continue(()),

src/constant.rs

+30-1
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ pub fn trans_constant<'a, 'tcx: 'a>(
8888
}
8989

9090
pub fn force_eval_const<'a, 'tcx: 'a>(
91-
fx: &mut FunctionCx<'a, 'tcx, impl Backend>,
91+
fx: &FunctionCx<'a, 'tcx, impl Backend>,
9292
const_: &'tcx Const,
9393
) -> &'tcx Const<'tcx> {
9494
match const_.val {
@@ -422,3 +422,32 @@ impl<'mir, 'tcx> Machine<'mir, 'tcx> for TransPlaceInterpreter {
422422
Ok(())
423423
}
424424
}
425+
426+
pub fn mir_operand_get_const_val<'tcx>(
427+
fx: &FunctionCx<'_, 'tcx, impl Backend>,
428+
operand: &Operand<'tcx>,
429+
) -> Result<&'tcx Const<'tcx>, String> {
430+
let place = match operand {
431+
Operand::Copy(place) => place,
432+
Operand::Constant(const_) => return Ok(force_eval_const(fx, const_.literal)),
433+
_ => return Err(format!("{:?}", operand)),
434+
};
435+
436+
assert!(place.projection.is_none());
437+
let static_ = match &place.base {
438+
PlaceBase::Static(static_) => {
439+
static_
440+
}
441+
PlaceBase::Local(_) => return Err("local".to_string()),
442+
};
443+
444+
Ok(match &static_.kind {
445+
StaticKind::Static(_) => unimplemented!(),
446+
StaticKind::Promoted(promoted) => {
447+
fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId {
448+
instance: fx.instance,
449+
promoted: Some(*promoted),
450+
})).unwrap()
451+
}
452+
})
453+
}

src/intrinsics.rs

+75-48
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,7 @@ pub fn lane_type_and_count<'tcx>(
144144
(lane_layout, lane_count)
145145
}
146146

147-
fn simd_for_each_lane<'tcx, B: Backend>(
147+
pub fn simd_for_each_lane<'tcx, B: Backend>(
148148
fx: &mut FunctionCx<'_, 'tcx, B>,
149149
intrinsic: &str,
150150
x: CValue<'tcx>,
@@ -170,23 +170,37 @@ fn simd_for_each_lane<'tcx, B: Backend>(
170170
}
171171
}
172172

173-
fn bool_to_zero_or_max_uint<'tcx>(
173+
pub fn bool_to_zero_or_max_uint<'tcx>(
174174
fx: &mut FunctionCx<'_, 'tcx, impl Backend>,
175175
layout: TyLayout<'tcx>,
176176
val: Value,
177177
) -> CValue<'tcx> {
178178
let ty = fx.clif_type(layout.ty).unwrap();
179179

180-
let zero = fx.bcx.ins().iconst(ty, 0);
181-
let max = fx.bcx.ins().iconst(ty, (u64::max_value() >> (64 - ty.bits())) as i64);
182-
let res = crate::common::codegen_select(&mut fx.bcx, val, max, zero);
180+
let int_ty = match ty {
181+
types::F32 => types::I32,
182+
types::F64 => types::I64,
183+
ty => ty,
184+
};
185+
186+
let zero = fx.bcx.ins().iconst(int_ty, 0);
187+
let max = fx.bcx.ins().iconst(int_ty, (u64::max_value() >> (64 - int_ty.bits())) as i64);
188+
let mut res = crate::common::codegen_select(&mut fx.bcx, val, max, zero);
189+
190+
if ty.is_float() {
191+
res = fx.bcx.ins().bitcast(ty, res);
192+
}
193+
183194
CValue::by_val(res, layout)
184195
}
185196

186197
macro_rules! simd_cmp {
187198
($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
188-
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, res_lane_layout, x_lane, y_lane| {
189-
let res_lane = fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane);
199+
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
200+
let res_lane = match lane_layout.ty.sty {
201+
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().icmp(IntCC::$cc, x_lane, y_lane),
202+
_ => unreachable!("{:?}", lane_layout.ty),
203+
};
190204
bool_to_zero_or_max_uint(fx, res_lane_layout, res_lane)
191205
});
192206
};
@@ -203,10 +217,13 @@ macro_rules! simd_cmp {
203217

204218
}
205219

206-
macro_rules! simd_binop {
220+
macro_rules! simd_int_binop {
207221
($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
208-
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, _lane_layout, ret_lane_layout, x_lane, y_lane| {
209-
let res_lane = fx.bcx.ins().$op(x_lane, y_lane);
222+
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
223+
let res_lane = match lane_layout.ty.sty {
224+
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
225+
_ => unreachable!("{:?}", lane_layout.ty),
226+
};
210227
CValue::by_val(res_lane, ret_lane_layout)
211228
});
212229
};
@@ -222,6 +239,42 @@ macro_rules! simd_binop {
222239
};
223240
}
224241

242+
macro_rules! simd_int_flt_binop {
243+
($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
244+
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
245+
let res_lane = match lane_layout.ty.sty {
246+
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
247+
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
248+
_ => unreachable!("{:?}", lane_layout.ty),
249+
};
250+
CValue::by_val(res_lane, ret_lane_layout)
251+
});
252+
};
253+
($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
254+
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
255+
let res_lane = match lane_layout.ty.sty {
256+
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
257+
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
258+
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
259+
_ => unreachable!("{:?}", lane_layout.ty),
260+
};
261+
CValue::by_val(res_lane, ret_lane_layout)
262+
});
263+
};
264+
}
265+
266+
macro_rules! simd_flt_binop {
267+
($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
268+
simd_for_each_lane($fx, $intrinsic, $x, $y, $ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
269+
let res_lane = match lane_layout.ty.sty {
270+
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
271+
_ => unreachable!("{:?}", lane_layout.ty),
272+
};
273+
CValue::by_val(res_lane, ret_lane_layout)
274+
});
275+
}
276+
}
277+
225278
pub fn codegen_intrinsic_call<'a, 'tcx: 'a>(
226279
fx: &mut FunctionCx<'a, 'tcx, impl Backend>,
227280
def_id: DefId,
@@ -840,30 +893,7 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>(
840893

841894
let indexes = {
842895
use rustc::mir::interpret::*;
843-
let idx_place = match idx {
844-
Operand::Copy(idx_place) => {
845-
idx_place
846-
}
847-
_ => panic!("simd_shuffle* idx is not Operand::Copy, but {:?}", idx),
848-
};
849-
850-
assert!(idx_place.projection.is_none());
851-
let static_ = match &idx_place.base {
852-
PlaceBase::Static(static_) => {
853-
static_
854-
}
855-
PlaceBase::Local(_) => panic!("simd_shuffle* idx is not constant, but a local"),
856-
};
857-
858-
let idx_const = match &static_.kind {
859-
StaticKind::Static(_) => unimplemented!(),
860-
StaticKind::Promoted(promoted) => {
861-
fx.tcx.const_eval(ParamEnv::reveal_all().and(GlobalId {
862-
instance: fx.instance,
863-
promoted: Some(*promoted),
864-
})).unwrap()
865-
}
866-
};
896+
let idx_const = crate::constant::mir_operand_get_const_val(fx, idx).expect("simd_shuffle* idx not const");
867897

868898
let idx_bytes = match idx_const.val {
869899
ConstValue::ByRef { align: _, offset, alloc } => {
@@ -900,41 +930,38 @@ pub fn codegen_intrinsic_call<'a, 'tcx: 'a>(
900930
};
901931

902932
simd_add, (c x, c y) {
903-
simd_binop!(fx, intrinsic, iadd(x, y) -> ret);
933+
simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret);
904934
};
905935
simd_sub, (c x, c y) {
906-
simd_binop!(fx, intrinsic, isub(x, y) -> ret);
936+
simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret);
907937
};
908938
simd_mul, (c x, c y) {
909-
simd_binop!(fx, intrinsic, imul(x, y) -> ret);
939+
simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret);
910940
};
911941
simd_div, (c x, c y) {
912-
simd_binop!(fx, intrinsic, udiv|sdiv(x, y) -> ret);
913-
};
914-
simd_rem, (c x, c y) {
915-
simd_binop!(fx, intrinsic, urem|srem(x, y) -> ret);
942+
simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret);
916943
};
917944
simd_shl, (c x, c y) {
918-
simd_binop!(fx, intrinsic, ishl(x, y) -> ret);
945+
simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret);
919946
};
920947
simd_shr, (c x, c y) {
921-
simd_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
948+
simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
922949
};
923950
simd_and, (c x, c y) {
924-
simd_binop!(fx, intrinsic, band(x, y) -> ret);
951+
simd_int_binop!(fx, intrinsic, band(x, y) -> ret);
925952
};
926953
simd_or, (c x, c y) {
927-
simd_binop!(fx, intrinsic, bor(x, y) -> ret);
954+
simd_int_binop!(fx, intrinsic, bor(x, y) -> ret);
928955
};
929956
simd_xor, (c x, c y) {
930-
simd_binop!(fx, intrinsic, bxor(x, y) -> ret);
957+
simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret);
931958
};
932959

933960
simd_fmin, (c x, c y) {
934-
simd_binop!(fx, intrinsic, fmin(x, y) -> ret);
961+
simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret);
935962
};
936963
simd_fmax, (c x, c y) {
937-
simd_binop!(fx, intrinsic, fmax(x, y) -> ret);
964+
simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret);
938965
};
939966
}
940967

0 commit comments

Comments
 (0)