Skip to content

Commit fa7481a

Browse files
committed
Add x86 implementation of SIMD swizzle instruction
1 parent 4a0f534 commit fa7481a

File tree

4 files changed

+85
-1
lines changed

4 files changed

+85
-1
lines changed

cranelift/codegen/meta/src/isa/x86/legalize.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -338,7 +338,9 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
338338
let splat = insts.by_name("splat");
339339
let shuffle = insts.by_name("shuffle");
340340
let sshr = insts.by_name("sshr");
341+
let swizzle = insts.by_name("swizzle");
341342
let trueif = insts.by_name("trueif");
343+
let uadd_sat = insts.by_name("uadd_sat");
342344
let umax = insts.by_name("umax");
343345
let umin = insts.by_name("umin");
344346
let ushr_imm = insts.by_name("ushr_imm");
@@ -375,6 +377,7 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
375377
let uimm8_one = Literal::constant(&imm.uimm8, 0x01);
376378
let u128_zeroes = constant(vec![0x00; 16]);
377379
let u128_ones = constant(vec![0xff; 16]);
380+
let u128_seventies = constant(vec![0x70; 16]);
378381
let a = var("a");
379382
let b = var("b");
380383
let c = var("c");
@@ -459,6 +462,21 @@ fn define_simd(shared: &mut SharedDefinitions, x86_instructions: &InstructionGro
459462
);
460463
}
461464

465+
// SIMD swizzle; the following inefficient implementation is due to the Wasm SIMD spec requiring
466+
// mask indexes greater than 15 to have the same semantics as a 0 index. For the spec discussion,
467+
// see https://github.com/WebAssembly/simd/issues/93.
468+
{
469+
let swizzle = swizzle.bind(vector(I8, sse_vector_size));
470+
narrow.legalize(
471+
def!(a = swizzle(x, y)),
472+
vec![
473+
def!(b = vconst(u128_seventies)),
474+
def!(c = uadd_sat(y, b)),
475+
def!(a = x86_pshufb(x, c)),
476+
],
477+
);
478+
}
479+
462480
// SIMD bnot
463481
for ty in ValueType::all_lane_types().filter(allowed_simd_type) {
464482
let bnot = bnot.bind(vector(ty, sse_vector_size));

cranelift/codegen/meta/src/shared/instructions.rs

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -517,7 +517,36 @@ fn define_simd_lane_access(
517517
.operands_out(vec![a]),
518518
);
519519

520-
let x = &Operand::new("x", TxN).with_doc("SIMD vector to modify");
520+
let I8x16 = &TypeVar::new(
521+
"I8x16",
522+
"A SIMD vector type consisting of 16 lanes of 8-bit integers",
523+
TypeSetBuilder::new()
524+
.ints(8..8)
525+
.simd_lanes(16..16)
526+
.includes_scalars(false)
527+
.build(),
528+
);
529+
let x = &Operand::new("x", I8x16).with_doc("Vector to modify by re-arranging lanes");
530+
let y = &Operand::new("y", I8x16).with_doc("Mask for re-arranging lanes");
531+
532+
ig.push(
533+
Inst::new(
534+
"swizzle",
535+
r#"
536+
Vector swizzle.
537+
538+
Returns a new vector with byte-width lanes selected from the lanes of the first input
539+
vector ``x`` specified in the second input vector ``s``. The indices ``i`` in range
540+
``[0, 15]`` select the ``i``-th element of ``x``. For indices outside of the range the
541+
resulting lane is 0. Note that this operates on byte-width lanes.
542+
"#,
543+
&formats.binary,
544+
)
545+
.operands_in(vec![x, y])
546+
.operands_out(vec![a]),
547+
);
548+
549+
let x = &Operand::new("x", TxN).with_doc("The vector to modify");
521550
let y = &Operand::new("y", &TxN.lane_of()).with_doc("New lane value");
522551
let Idx = &Operand::new("Idx", &imm.uimm8).with_doc("Lane index");
523552

cranelift/filetests/filetests/isa/x86/simd-lane-access-legalize.clif

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,3 +83,14 @@ block0:
8383
; nextln: v4 = vconst.i8x16 0x00
8484
; nextln: v1 = x86_pshufb v3, v4
8585
; nextln: return v1
86+
87+
function %swizzle() -> i8x16 {
88+
block0:
89+
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
90+
v1 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
91+
v2 = swizzle.i8x16 v0, v1
92+
; check: v3 = vconst.i8x16 0x70707070707070707070707070707070
93+
; nextln: v4 = uadd_sat v1, v3
94+
; nextln: v2 = x86_pshufb v0, v4
95+
return v2
96+
}

cranelift/filetests/filetests/isa/x86/simd-lane-access-run.clif

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,29 @@ block0:
165165
return v8
166166
}
167167
; run
168+
169+
function %swizzle() -> b1 {
170+
block0:
171+
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
172+
v1 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 42]
173+
v2 = swizzle.i8x16 v0, v1 ; reverse the lanes, with over-large index 42 using lane 0
174+
175+
v3 = vconst.i8x16 [15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0]
176+
v4 = icmp eq v2, v3
177+
v5 = vall_true v4
178+
return v5
179+
}
180+
; run:
181+
182+
function %swizzle_with_overflow() -> b1 {
183+
block0:
184+
v0 = vconst.i8x16 [0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15]
185+
v1 = vconst.i8x16 [16 250 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
186+
v2 = swizzle.i8x16 v0, v1 ; 250 should overflow but saturate so that the MSB is set (PSHUFB uses this to shuffle from lane 0)
187+
188+
v3 = vconst.i8x16 [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
189+
v4 = icmp eq v2, v3
190+
v5 = vall_true v4
191+
return v5
192+
}
193+
; run:

0 commit comments

Comments
 (0)