Skip to content

Commit 7b4fc34

Browse files
authored
Merge pull request #842 from bjorn3/real_simd
Codegen simd intrinsics as simd clif instructions
2 parents c3daf6d + 30a760d commit 7b4fc34

File tree

5 files changed

+122
-124
lines changed

5 files changed

+122
-124
lines changed

src/intrinsics/llvm.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ pub fn codegen_llvm_intrinsic_call<'tcx>(
8686
kind => unreachable!("kind {:?}", kind),
8787
};
8888

89-
simd_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
89+
simd_pair_for_each_lane(fx, intrinsic, x, y, ret, |fx, lane_layout, res_lane_layout, x_lane, y_lane| {
9090
let res_lane = match lane_layout.ty.kind {
9191
ty::Float(_) => fx.bcx.ins().fcmp(flt_cc, x_lane, y_lane),
9292
_ => unreachable!("{:?}", lane_layout.ty),

src/intrinsics/mod.rs

+67-81
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,34 @@ pub fn lane_type_and_count<'tcx>(
144144
}
145145

146146
fn simd_for_each_lane<'tcx, B: Backend>(
147+
fx: &mut FunctionCx<'_, 'tcx, B>,
148+
intrinsic: &str,
149+
val: CValue<'tcx>,
150+
ret: CPlace<'tcx>,
151+
f: impl Fn(
152+
&mut FunctionCx<'_, 'tcx, B>,
153+
TyLayout<'tcx>,
154+
TyLayout<'tcx>,
155+
Value,
156+
) -> CValue<'tcx>,
157+
) {
158+
let layout = val.layout();
159+
160+
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, layout);
161+
let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx.tcx, ret.layout());
162+
assert_eq!(lane_count, ret_lane_count);
163+
164+
for lane_idx in 0..lane_count {
165+
let lane_idx = mir::Field::new(lane_idx.try_into().unwrap());
166+
let lane = val.value_field(fx, lane_idx).load_scalar(fx);
167+
168+
let res_lane = f(fx, lane_layout, ret_lane_layout, lane);
169+
170+
ret.place_field(fx, lane_idx).write_cvalue(fx, res_lane);
171+
}
172+
}
173+
174+
fn simd_pair_for_each_lane<'tcx, B: Backend>(
147175
fx: &mut FunctionCx<'_, 'tcx, B>,
148176
intrinsic: &str,
149177
x: CValue<'tcx>,
@@ -204,7 +232,7 @@ fn bool_to_zero_or_max_uint<'tcx>(
204232

205233
macro simd_cmp {
206234
($fx:expr, $intrinsic:expr, $cc:ident($x:ident, $y:ident) -> $ret:ident) => {
207-
simd_for_each_lane(
235+
simd_pair_for_each_lane(
208236
$fx,
209237
$intrinsic,
210238
$x,
@@ -220,7 +248,7 @@ macro simd_cmp {
220248
);
221249
},
222250
($fx:expr, $intrinsic:expr, $cc_u:ident|$cc_s:ident($x:ident, $y:ident) -> $ret:ident) => {
223-
simd_for_each_lane(
251+
simd_pair_for_each_lane(
224252
$fx,
225253
$intrinsic,
226254
$x,
@@ -239,94 +267,52 @@ macro simd_cmp {
239267
}
240268

241269
macro simd_int_binop {
242-
($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
243-
simd_for_each_lane(
244-
$fx,
245-
$intrinsic,
246-
$x,
247-
$y,
248-
$ret,
249-
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
250-
let res_lane = match lane_layout.ty.kind {
251-
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
252-
_ => unreachable!("{:?}", lane_layout.ty),
253-
};
254-
CValue::by_val(res_lane, ret_lane_layout)
255-
},
256-
);
270+
($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) => {
271+
simd_int_binop!($fx, $op|$op($x, $y) -> $ret);
257272
},
258-
($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
259-
simd_for_each_lane(
260-
$fx,
261-
$intrinsic,
262-
$x,
263-
$y,
264-
$ret,
265-
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
266-
let res_lane = match lane_layout.ty.kind {
267-
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
268-
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
269-
_ => unreachable!("{:?}", lane_layout.ty),
270-
};
271-
CValue::by_val(res_lane, ret_lane_layout)
272-
},
273-
);
273+
($fx:expr, $op_u:ident|$op_s:ident($x:ident, $y:ident) -> $ret:ident) => {
274+
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
275+
let x_val = $x.load_scalar($fx);
276+
let y_val = $y.load_scalar($fx);
277+
278+
let res = match lane_layout.ty.kind {
279+
ty::Uint(_) => $fx.bcx.ins().$op_u(x_val, y_val),
280+
ty::Int(_) => $fx.bcx.ins().$op_s(x_val, y_val),
281+
_ => unreachable!("{:?}", lane_layout.ty),
282+
};
283+
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
274284
},
275285
}
276286

277287
macro simd_int_flt_binop {
278-
($fx:expr, $intrinsic:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
279-
simd_for_each_lane(
280-
$fx,
281-
$intrinsic,
282-
$x,
283-
$y,
284-
$ret,
285-
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
286-
let res_lane = match lane_layout.ty.kind {
287-
ty::Uint(_) | ty::Int(_) => fx.bcx.ins().$op(x_lane, y_lane),
288-
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
289-
_ => unreachable!("{:?}", lane_layout.ty),
290-
};
291-
CValue::by_val(res_lane, ret_lane_layout)
292-
},
293-
);
288+
($fx:expr, $op:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
289+
simd_int_flt_binop!($fx, $op|$op|$op_f($x, $y) -> $ret);
294290
},
295-
($fx:expr, $intrinsic:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
296-
simd_for_each_lane(
297-
$fx,
298-
$intrinsic,
299-
$x,
300-
$y,
301-
$ret,
302-
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
303-
let res_lane = match lane_layout.ty.kind {
304-
ty::Uint(_) => fx.bcx.ins().$op_u(x_lane, y_lane),
305-
ty::Int(_) => fx.bcx.ins().$op_s(x_lane, y_lane),
306-
ty::Float(_) => fx.bcx.ins().$op_f(x_lane, y_lane),
307-
_ => unreachable!("{:?}", lane_layout.ty),
308-
};
309-
CValue::by_val(res_lane, ret_lane_layout)
310-
},
311-
);
291+
($fx:expr, $op_u:ident|$op_s:ident|$op_f:ident($x:ident, $y:ident) -> $ret:ident) => {
292+
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
293+
let x_val = $x.load_scalar($fx);
294+
let y_val = $y.load_scalar($fx);
295+
296+
let res = match lane_layout.ty.kind {
297+
ty::Uint(_) => $fx.bcx.ins().$op_u(x_val, y_val),
298+
ty::Int(_) => $fx.bcx.ins().$op_s(x_val, y_val),
299+
ty::Float(_) => $fx.bcx.ins().$op_f(x_val, y_val),
300+
_ => unreachable!("{:?}", lane_layout.ty),
301+
};
302+
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
312303
},
313304
}
314305

315-
macro simd_flt_binop($fx:expr, $intrinsic:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
316-
simd_for_each_lane(
317-
$fx,
318-
$intrinsic,
319-
$x,
320-
$y,
321-
$ret,
322-
|fx, lane_layout, ret_lane_layout, x_lane, y_lane| {
323-
let res_lane = match lane_layout.ty.kind {
324-
ty::Float(_) => fx.bcx.ins().$op(x_lane, y_lane),
325-
_ => unreachable!("{:?}", lane_layout.ty),
326-
};
327-
CValue::by_val(res_lane, ret_lane_layout)
328-
},
329-
);
306+
macro simd_flt_binop($fx:expr, $op:ident($x:ident, $y:ident) -> $ret:ident) {
307+
let (lane_layout, lane_count) = lane_type_and_count($fx.tcx, $x.layout());
308+
let x_val = $x.load_scalar($fx);
309+
let y_val = $y.load_scalar($fx);
310+
311+
let res = match lane_layout.ty.kind {
312+
ty::Float(_) => $fx.bcx.ins().$op(x_val, y_val),
313+
_ => unreachable!("{:?}", lane_layout.ty),
314+
};
315+
$ret.write_cvalue($fx, CValue::by_val(res, $ret.layout()));
330316
}
331317

332318
pub fn codegen_intrinsic_call<'tcx>(

src/intrinsics/simd.rs

+18-25
Original file line numberDiff line numberDiff line change
@@ -21,22 +21,15 @@ pub fn codegen_simd_intrinsic_call<'tcx>(
2121
};
2222

2323
simd_cast, (c a) {
24-
let (lane_layout, lane_count) = lane_type_and_count(fx.tcx, a.layout());
25-
let (ret_lane_layout, ret_lane_count) = lane_type_and_count(fx.tcx, ret.layout());
26-
assert_eq!(lane_count, ret_lane_count);
24+
simd_for_each_lane(fx, intrinsic, a, ret, |fx, lane_layout, ret_lane_layout, lane| {
25+
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
2726

28-
let ret_lane_ty = fx.clif_type(ret_lane_layout.ty).unwrap();
27+
let from_signed = type_sign(lane_layout.ty);
28+
let to_signed = type_sign(ret_lane_layout.ty);
2929

30-
let from_signed = type_sign(lane_layout.ty);
31-
let to_signed = type_sign(ret_lane_layout.ty);
32-
33-
for lane in 0..lane_count {
34-
let lane = mir::Field::new(lane.try_into().unwrap());
35-
36-
let a_lane = a.value_field(fx, lane).load_scalar(fx);
37-
let res = clif_int_or_float_cast(fx, a_lane, from_signed, ret_lane_ty, to_signed);
38-
ret.place_field(fx, lane).write_cvalue(fx, CValue::by_val(res, ret_lane_layout));
39-
}
30+
let ret_lane = clif_int_or_float_cast(fx, lane, from_signed, ret_lane_ty, to_signed);
31+
CValue::by_val(ret_lane, ret_lane_layout)
32+
});
4033
};
4134

4235
simd_eq, (c x, c y) {
@@ -134,38 +127,38 @@ pub fn codegen_simd_intrinsic_call<'tcx>(
134127
};
135128

136129
simd_add, (c x, c y) {
137-
simd_int_flt_binop!(fx, intrinsic, iadd|fadd(x, y) -> ret);
130+
simd_int_flt_binop!(fx, iadd|fadd(x, y) -> ret);
138131
};
139132
simd_sub, (c x, c y) {
140-
simd_int_flt_binop!(fx, intrinsic, isub|fsub(x, y) -> ret);
133+
simd_int_flt_binop!(fx, isub|fsub(x, y) -> ret);
141134
};
142135
simd_mul, (c x, c y) {
143-
simd_int_flt_binop!(fx, intrinsic, imul|fmul(x, y) -> ret);
136+
simd_int_flt_binop!(fx, imul|fmul(x, y) -> ret);
144137
};
145138
simd_div, (c x, c y) {
146-
simd_int_flt_binop!(fx, intrinsic, udiv|sdiv|fdiv(x, y) -> ret);
139+
simd_int_flt_binop!(fx, udiv|sdiv|fdiv(x, y) -> ret);
147140
};
148141
simd_shl, (c x, c y) {
149-
simd_int_binop!(fx, intrinsic, ishl(x, y) -> ret);
142+
simd_int_binop!(fx, ishl(x, y) -> ret);
150143
};
151144
simd_shr, (c x, c y) {
152-
simd_int_binop!(fx, intrinsic, ushr|sshr(x, y) -> ret);
145+
simd_int_binop!(fx, ushr|sshr(x, y) -> ret);
153146
};
154147
simd_and, (c x, c y) {
155-
simd_int_binop!(fx, intrinsic, band(x, y) -> ret);
148+
simd_int_binop!(fx, band(x, y) -> ret);
156149
};
157150
simd_or, (c x, c y) {
158-
simd_int_binop!(fx, intrinsic, bor(x, y) -> ret);
151+
simd_int_binop!(fx, bor(x, y) -> ret);
159152
};
160153
simd_xor, (c x, c y) {
161-
simd_int_binop!(fx, intrinsic, bxor(x, y) -> ret);
154+
simd_int_binop!(fx, bxor(x, y) -> ret);
162155
};
163156

164157
simd_fmin, (c x, c y) {
165-
simd_flt_binop!(fx, intrinsic, fmin(x, y) -> ret);
158+
simd_flt_binop!(fx, fmin(x, y) -> ret);
166159
};
167160
simd_fmax, (c x, c y) {
168-
simd_flt_binop!(fx, intrinsic, fmax(x, y) -> ret);
161+
simd_flt_binop!(fx, fmax(x, y) -> ret);
169162
};
170163
}
171164
}

src/pointer.rs

+11-8
Original file line numberDiff line numberDiff line change
@@ -126,8 +126,8 @@ impl Pointer {
126126
) -> Value {
127127
match self.base {
128128
PointerBase::Addr(base_addr) => fx.bcx.ins().load(ty, flags, base_addr, self.offset),
129-
PointerBase::Stack(stack_slot) => if ty == types::I128 {
130-
// WORKAROUND for stack_load.i128 not being implemented
129+
PointerBase::Stack(stack_slot) => if ty == types::I128 || ty.is_vector() {
130+
// WORKAROUND for stack_load.i128 and stack_load.iXxY not being implemented
131131
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
132132
fx.bcx.ins().load(ty, flags, base_addr, self.offset)
133133
} else {
@@ -146,12 +146,15 @@ impl Pointer {
146146
PointerBase::Addr(base_addr) => {
147147
fx.bcx.ins().store(flags, value, base_addr, self.offset);
148148
}
149-
PointerBase::Stack(stack_slot) => if fx.bcx.func.dfg.value_type(value) == types::I128 {
150-
// WORKAROUND for stack_load.i128 not being implemented
151-
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
152-
fx.bcx.ins().store(flags, value, base_addr, self.offset);
153-
} else {
154-
fx.bcx.ins().stack_store(value, stack_slot, self.offset);
149+
PointerBase::Stack(stack_slot) => {
150+
let val_ty = fx.bcx.func.dfg.value_type(value);
151+
if val_ty == types::I128 || val_ty.is_vector() {
152+
// WORKAROUND for stack_store.i128 and stack_store.iXxY not being implemented
153+
let base_addr = fx.bcx.ins().stack_addr(fx.pointer_type, stack_slot, 0);
154+
fx.bcx.ins().store(flags, value, base_addr, self.offset);
155+
} else {
156+
fx.bcx.ins().stack_store(value, stack_slot, self.offset);
157+
}
155158
}
156159
}
157160
}

src/value_and_place.rs

+25-9
Original file line numberDiff line numberDiff line change
@@ -122,11 +122,14 @@ impl<'tcx> CValue<'tcx> {
122122
let layout = self.1;
123123
match self.0 {
124124
CValueInner::ByRef(ptr) => {
125-
let scalar = match layout.abi {
126-
layout::Abi::Scalar(ref scalar) => scalar.clone(),
125+
let clif_ty = match layout.abi {
126+
layout::Abi::Scalar(ref scalar) => scalar_to_clif_type(fx.tcx, scalar.clone()),
127+
layout::Abi::Vector { ref element, count } => {
128+
scalar_to_clif_type(fx.tcx, element.clone())
129+
.by(u16::try_from(count).unwrap()).unwrap()
130+
}
127131
_ => unreachable!(),
128132
};
129-
let clif_ty = scalar_to_clif_type(fx.tcx, scalar);
130133
ptr.load(fx, clif_ty, MemFlags::new())
131134
}
132135
CValueInner::ByVal(value) => value,
@@ -164,13 +167,26 @@ impl<'tcx> CValue<'tcx> {
164167
field: mir::Field,
165168
) -> CValue<'tcx> {
166169
let layout = self.1;
167-
let ptr = match self.0 {
168-
CValueInner::ByRef(ptr) => ptr,
170+
match self.0 {
171+
CValueInner::ByVal(val) => {
172+
match layout.abi {
173+
layout::Abi::Vector { element: _, count } => {
174+
let count = u8::try_from(count).expect("SIMD type with more than 255 lanes???");
175+
let field = u8::try_from(field.index()).unwrap();
176+
assert!(field < count);
177+
let lane = fx.bcx.ins().extractlane(val, field);
178+
let field_layout = layout.field(&*fx, usize::from(field));
179+
CValue::by_val(lane, field_layout)
180+
}
181+
_ => unreachable!("value_field for ByVal with abi {:?}", layout.abi),
182+
}
183+
}
184+
CValueInner::ByRef(ptr) => {
185+
let (field_ptr, field_layout) = codegen_field(fx, ptr, None, layout, field);
186+
CValue::by_ref(field_ptr, field_layout)
187+
}
169188
_ => bug!("place_field for {:?}", self),
170-
};
171-
172-
let (field_ptr, field_layout) = codegen_field(fx, ptr, None, layout, field);
173-
CValue::by_ref(field_ptr, field_layout)
189+
}
174190
}
175191

176192
pub fn unsize_value<'a>(self, fx: &mut FunctionCx<'_, 'tcx, impl Backend>, dest: CPlace<'tcx>) {

0 commit comments

Comments
 (0)