Skip to content

Commit 90e4493

Browse files
committed
Implement more simd intrinsics
1 parent 344cbac commit 90e4493

File tree

1 file changed

+140
-7
lines changed

1 file changed

+140
-7
lines changed

src/intrinsics/simd.rs

Lines changed: 140 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,34 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
167167
ret.write_cvalue(fx, ret_lane);
168168
};
169169

170+
simd_neg, (c a) {
171+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
172+
simd_for_each_lane(fx, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
173+
let ret_lane = match lane_layout.ty.kind() {
174+
ty::Int(_) => fx.bcx.ins().ineg(lane),
175+
ty::Float(_) => fx.bcx.ins().fneg(lane),
176+
_ => unreachable!(),
177+
};
178+
CValue::by_val(ret_lane, ret_lane_layout)
179+
});
180+
};
181+
182+
simd_fabs, (c a) {
183+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
184+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
185+
let ret_lane = fx.bcx.ins().fabs(lane);
186+
CValue::by_val(ret_lane, ret_lane_layout)
187+
});
188+
};
189+
190+
simd_fsqrt, (c a) {
191+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
192+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
193+
let ret_lane = fx.bcx.ins().sqrt(lane);
194+
CValue::by_val(ret_lane, ret_lane_layout)
195+
});
196+
};
197+
170198
simd_add, (c x, c y) {
171199
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
172200
simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
@@ -183,6 +211,29 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
183211
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
184212
simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
185213
};
214+
simd_rem, (c x, c y) {
215+
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
216+
simd_pair_for_each_lane(fx, x, y, ret, |fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
217+
let res_lane = match lane_layout.ty.kind() {
218+
ty::Uint(_) => fx.bcx.ins().urem(x_lane, y_lane),
219+
ty::Int(_) => fx.bcx.ins().srem(x_lane, y_lane),
220+
ty::Float(FloatTy::F32) => fx.lib_call(
221+
"fmodf",
222+
vec![AbiParam::new(types::F32), AbiParam::new(types::F32)],
223+
vec![AbiParam::new(types::F32)],
224+
&[x_lane, y_lane],
225+
)[0],
226+
ty::Float(FloatTy::F64) => fx.lib_call(
227+
"fmod",
228+
vec![AbiParam::new(types::F64), AbiParam::new(types::F64)],
229+
vec![AbiParam::new(types::F64)],
230+
&[x_lane, y_lane],
231+
)[0],
232+
_ => unreachable!("{:?}", lane_layout.ty),
233+
};
234+
CValue::by_val(res_lane, ret_lane_layout)
235+
});
236+
};
186237
simd_shl, (c x, c y) {
187238
validate_simd_type!(fx, intrinsic, span, x.layout().ty);
188239
simd_int_binop!(fx, ishl(x, y) -> ret);
@@ -236,6 +287,35 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
236287
simd_flt_binop!(fx, fmax(x, y) -> ret);
237288
};
238289

290+
simd_round, (c a) {
291+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
292+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
293+
let ret_lane = fx.bcx.ins().nearest(lane);
294+
CValue::by_val(ret_lane, ret_lane_layout)
295+
});
296+
};
297+
simd_ceil, (c a) {
298+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
299+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
300+
let ret_lane = fx.bcx.ins().ceil(lane);
301+
CValue::by_val(ret_lane, ret_lane_layout)
302+
});
303+
};
304+
simd_floor, (c a) {
305+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
306+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
307+
let ret_lane = fx.bcx.ins().floor(lane);
308+
CValue::by_val(ret_lane, ret_lane_layout)
309+
});
310+
};
311+
simd_trunc, (c a) {
312+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
313+
simd_for_each_lane(fx, a, ret, |fx, _lane_layout, ret_lane_layout, lane| {
314+
let ret_lane = fx.bcx.ins().trunc(lane);
315+
CValue::by_val(ret_lane, ret_lane_layout)
316+
});
317+
};
318+
239319
simd_reduce_add_ordered | simd_reduce_add_unordered, (c v, v acc) {
240320
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
241321
simd_reduce(fx, v, Some(acc), ret, |fx, lane_layout, a, b| {
@@ -268,13 +348,66 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
268348
simd_reduce_bool(fx, v, ret, |fx, a, b| fx.bcx.ins().bor(a, b));
269349
};
270350

271-
// simd_fabs
272-
// simd_saturating_add
351+
simd_reduce_and, (c v) {
352+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
353+
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().band(a, b));
354+
};
355+
356+
simd_reduce_or, (c v) {
357+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
358+
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bor(a, b));
359+
};
360+
361+
simd_reduce_xor, (c v) {
362+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
363+
simd_reduce(fx, v, None, ret, |fx, _layout, a, b| fx.bcx.ins().bxor(a, b));
364+
};
365+
366+
simd_reduce_min, (c v) {
367+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
368+
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
369+
let lt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
370+
IntCC::SignedLessThan
371+
} else {
372+
IntCC::UnsignedLessThan
373+
}, a, b);
374+
fx.bcx.ins().select(lt, a, b)
375+
});
376+
};
377+
378+
simd_reduce_max, (c v) {
379+
validate_simd_type!(fx, intrinsic, span, v.layout().ty);
380+
simd_reduce(fx, v, None, ret, |fx, layout, a, b| {
381+
let gt = fx.bcx.ins().icmp(if layout.ty.is_signed() {
382+
IntCC::SignedGreaterThan
383+
} else {
384+
IntCC::UnsignedGreaterThan
385+
}, a, b);
386+
fx.bcx.ins().select(gt, a, b)
387+
});
388+
};
389+
390+
simd_select, (c m, c a, c b) {
391+
validate_simd_type!(fx, intrinsic, span, m.layout().ty);
392+
validate_simd_type!(fx, intrinsic, span, a.layout().ty);
393+
assert_eq!(a.layout(), b.layout());
394+
395+
let (lane_count, lane_ty) = a.layout().ty.simd_size_and_type(fx.tcx);
396+
let lane_layout = fx.layout_of(lane_ty);
397+
398+
for lane in 0..lane_count {
399+
let m_lane = m.value_lane(fx, lane).load_scalar(fx);
400+
let a_lane = a.value_lane(fx, lane).load_scalar(fx);
401+
let b_lane = b.value_lane(fx, lane).load_scalar(fx);
402+
403+
let m_lane = fx.bcx.ins().icmp_imm(IntCC::Equal, m_lane, 0);
404+
let res_lane = CValue::by_val(fx.bcx.ins().select(m_lane, b_lane, a_lane), lane_layout);
405+
406+
ret.place_lane(fx, lane).write_cvalue(fx, res_lane);
407+
}
408+
};
409+
410+
// simd_saturating_*
273411
// simd_bitmask
274-
// simd_select
275-
// simd_rem
276-
// simd_neg
277-
// simd_trunc
278-
// simd_floor
279412
}
280413
}

0 commit comments

Comments
 (0)