Skip to content

Codegen simd intrinsics as simd clif instructions #842

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
Dec 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/intrinsics/llvm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ pub fn codegen_llvm_intrinsic_call<'tcx>(
kind => unreachable!("kind {:?}", kind),
};

simd_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
simd_pair_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
Expand Down
148 changes: 67 additions & 81 deletions src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,34 @@ pub fn lane_type_and_count<'tcx>(
}

fn simd_for_each_lane<'tcx, B: Backend>(
fx: &mut FunctionCx<'_, 'tcx, B>,
intrinsic: &str,
val: CValue<'tcx>,
ret: CPlace<'tcx>,
f: impl Fn(
&mut FunctionCx<'_, 'tcx, B>,
TyLayout<'tcx>,
TyLayout<'tcx>,
Value,
) -> CValue<'tcx>,
) {
let layout = val.layout();

let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, layout);
let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx.tcx, ret.layout());
assert_eq!(lane_count, ret_lane_count);

for lane_idx in 0..lane_count {
let lane_idx = mir::Field::new(lane_idx.try_into().unwrap());
let lane = val.value_field(fx, lane_idx).load_scalar(fx);

let res_lane = f(fx, lane_layout, ret_lane_layout, lane);

ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane);
}
}

fn simd_pair_for_each_lane<'tcx, B: Backend>(
fx: &mut FunctionCx<'_, 'tcx, B>,
intrinsic: &str,
x: CValue<'tcx>,
Expand Down Expand Up @@ -204,7 +232,7 @@ fn bool_to_zero_or_max_uint<'tcx>(

macro simd_cmp {
($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
simd_pair_for_each_lane(
$fx,
$intrinsic,
$x,
Expand All @@ -220,7 +248,7 @@ macro simd_cmp {
);
},
($fx:expr, $intrinsic:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
simd_pair_for_each_lane(
$fx,
$intrinsic,
$x,
Expand All @@ -239,94 +267,52 @@ macro simd_cmp {
}

macro simd_int_binop {
($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
$fx,
$intrinsic,
$x,
$y,
$ret,
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
CValue::by_val(res_lane, ret_lane_layout)
},
);
($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_int_binop!($fx, $op|$op($x, $y) -> $ret);
},
($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
$fx,
$intrinsic,
$x,
$y,
$ret,
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
CValue::by_val(res_lane, ret_lane_layout)
},
);
($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
let x_val = $x.load_scalar($fx);
let y_val = $y.load_scalar($fx);

let res = match lane_layout.ty.kind {
ty::Uint(_) => $fx.bcx.ins().$op_u(x_val, y_val),
ty::Int(_) => $fx.bcx.ins().$op_s(x_val, y_val),
_ => unreachable!("{:?}", lane_layout.ty),
};
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
},
}

macro simd_int_flt_binop {
($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
$fx,
$intrinsic,
$x,
$y,
$ret,
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
CValue::by_val(res_lane, ret_lane_layout)
},
);
($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret);
},
($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
simd_for_each_lane(
$fx,
$intrinsic,
$x,
$y,
$ret,
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
CValue::by_val(res_lane, ret_lane_layout)
},
);
($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
let x_val = $x.load_scalar($fx);
let y_val = $y.load_scalar($fx);

let res = match lane_layout.ty.kind {
ty::Uint(_) => $fx.bcx.ins().$op_u(x_val, y_val),
ty::Int(_) => $fx.bcx.ins().$op_s(x_val, y_val),
ty::Float(_) => $fx.bcx.ins().$op_f(x_val, y_val),
_ => unreachable!("{:?}", lane_layout.ty),
};
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
},
}

macro simd_flt_binop($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
simd_for_each_lane(
$fx,
$intrinsic,
$x,
$y,
$ret,
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
let res_lane = match lane_layout.ty.kind {
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
_ => unreachable!("{:?}", lane_layout.ty),
};
CValue::by_val(res_lane, ret_lane_layout)
},
);
macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
let x_val = $x.load_scalar($fx);
let y_val = $y.load_scalar($fx);

let res = match lane_layout.ty.kind {
ty::Float(_) => $fx.bcx.ins().$op(x_val, y_val),
_ => unreachable!("{:?}", lane_layout.ty),
};
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
}

pub fn codegen_intrinsic_call<'tcx>(
Expand Down
43 changes: 18 additions & 25 deletions src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,15 @@ pub fn codegen_simd_intrinsic_call<'tcx>(
};

simd_cast, (c a) {
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, a.layout());
let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx.tcx, ret.layout());
assert_eq!(lane_count, ret_lane_count);
simd_for_each_lane(fx, intrinsic, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();

let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
let from_signed = type_sign(lane_layout.ty);
let to_signed = type_sign(ret_lane_layout.ty);

let from_signed = type_sign(lane_layout.ty);
let to_signed = type_sign(ret_lane_layout.ty);

for lane in 0..lane_count {
let lane = mir::Field::new(lane.try_into().unwrap());

let a_lane = a.value_field(fx, lane).load_scalar(fx);
let res = clif_int_or_float_cast(fx, a_lane, from_signed, ret_lane_ty, to_signed);
ret.place_field(fx, lane).write_cvalue(fx, CValue::by_val(res, ret_lane_layout));
}
let ret_lane = clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed);
CValue::by_val(ret_lane, ret_lane_layout)
});
};

simd_eq, (c x, c y) {
Expand Down Expand Up @@ -134,38 +127,38 @@ pub fn codegen_simd_intrinsic_call<'tcx>(
};

simd_add, (c x, c y) {
simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret);
simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
};
simd_sub, (c x, c y) {
simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret);
simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret);
};
simd_mul, (c x, c y) {
simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret);
simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret);
};
simd_div, (c x, c y) {
simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret);
simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
};
simd_shl, (c x, c y) {
simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret);
simd_int_binop!(fx, ishl(x, y) -> ret);
};
simd_shr, (c x, c y) {
simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
simd_int_binop!(fx, ushr|sshr(x, y) -> ret);
};
simd_and, (c x, c y) {
simd_int_binop!(fx, intrinsic, band(x, y) -> ret);
simd_int_binop!(fx, band(x, y) -> ret);
};
simd_or, (c x, c y) {
simd_int_binop!(fx, intrinsic, bor(x, y) -> ret);
simd_int_binop!(fx, bor(x, y) -> ret);
};
simd_xor, (c x, c y) {
simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret);
simd_int_binop!(fx, bxor(x, y) -> ret);
};

simd_fmin, (c x, c y) {
simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret);
simd_flt_binop!(fx, fmin(x, y) -> ret);
};
simd_fmax, (c x, c y) {
simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret);
simd_flt_binop!(fx, fmax(x, y) -> ret);
};
}
}
19 changes: 11 additions & 8 deletions src/pointer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,8 @@ impl Pointer {
) -> Value {
match self.base {
PointerBase::Addr(base_addr) => fx.bcx.ins().load(ty, flags, base_addr, self.offset),
PointerBase::Stack(stack_slot) => if ty == types::I128 {
// WORKAROUND for stack_load.i128 not being implemented
PointerBase::Stack(stack_slot) => if ty == types::I128 || ty.is_vector() {
// WORKAROUND for stack_load.i128 and stack_load.iXxY not being implemented
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
fx.bcx.ins().load(ty, flags, base_addr, self.offset)
} else {
Expand All @@ -146,12 +146,15 @@ impl Pointer {
PointerBase::Addr(base_addr) => {
fx.bcx.ins().store(flags, value, base_addr, self.offset);
}
PointerBase::Stack(stack_slot) => if fx.bcx.func.dfg.value_type(value) == types::I128 {
// WORKAROUND for stack_load.i128 not being implemented
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
fx.bcx.ins().store(flags, value, base_addr, self.offset);
} else {
fx.bcx.ins().stack_store(value, stack_slot, self.offset);
PointerBase::Stack(stack_slot) => {
let val_ty = fx.bcx.func.dfg.value_type(value);
if val_ty == types::I128 || val_ty.is_vector() {
// WORKAROUND for stack_store.i128 and stack_store.iXxY not being implemented
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
fx.bcx.ins().store(flags, value, base_addr, self.offset);
} else {
fx.bcx.ins().stack_store(value, stack_slot, self.offset);
}
}
}
}
Expand Down
34 changes: 25 additions & 9 deletions src/value_and_place.rs
Original file line number Diff line number Diff line change
Expand Up @@ -122,11 +122,14 @@ impl<'tcx> CValue<'tcx> {
let layout = self.1;
match self.0 {
CValueInner::ByRef(ptr) => {
let scalar = match layout.abi {
layout::Abi::Scalar(ref scalar) => scalar.clone(),
let clif_ty = match layout.abi {
layout::Abi::Scalar(ref scalar) => scalar_to_clif_type(fx.tcx, scalar.clone()),
layout::Abi::Vector { ref element, count } => {
scalar_to_clif_type(fx.tcx, element.clone())
.by(u16::try_from(count).unwrap()).unwrap()
}
_ => unreachable!(),
};
let clif_ty = scalar_to_clif_type(fx.tcx, scalar);
ptr.load(fx, clif_ty, MemFlags::new())
}
CValueInner::ByVal(value) => value,
Expand Down Expand Up @@ -164,13 +167,26 @@ impl<'tcx> CValue<'tcx> {
field: mir::Field,
) -> CValue<'tcx> {
let layout = self.1;
let ptr = match self.0 {
CValueInner::ByRef(ptr) => ptr,
match self.0 {
CValueInner::ByVal(val) => {
match layout.abi {
layout::Abi::Vector { element: _, count } => {
let count = u8::try_from(count).expect("SIMD type with more than 255 lanes???");
let field = u8::try_from(field.index()).unwrap();
assert!(field < count);
let lane = fx.bcx.ins().extractlane(val, field);
let field_layout = layout.field(&*fx, usize::from(field));
CValue::by_val(lane, field_layout)
}
_ => unreachable!("value_field for ByVal with abi {:?}", layout.abi),
}
}
CValueInner::ByRef(ptr) => {
let (field_ptr, field_layout) = codegen_field(fx, ptr, None, layout, field);
CValue::by_ref(field_ptr, field_layout)
}
_ => bug!("place_field for {:?}", self),
};

let (field_ptr, field_layout) = codegen_field(fx, ptr, None, layout, field);
CValue::by_ref(field_ptr, field_layout)
}
}

pub fn unsize_value<'a>(self, fx: &mut FunctionCx<'_, 'tcx, impl Backend>, dest: CPlace<'tcx>) {
Expand Down