Skip to content

Commit ed1b0da

Browse files
committed
[X86] combineConcatVectorOps - fold v4i64/v8x32 concat(broadcast(),broadcast()) -> permilps(concat())
Extend the existing v4f64 fold to handle v4i64/v8f32/v8i32 as well Fixes llvm#58585
1 parent 3125a4d commit ed1b0da

File tree

3 files changed

+29
-34
lines changed

3 files changed

+29
-34
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54460,11 +54460,20 @@ static SDValue combineConcatVectorOps(const SDLoc &DL, MVT VT,
5446054460
unsigned NumOps = Ops.size();
5446154461
switch (Op0.getOpcode()) {
5446254462
case X86ISD::VBROADCAST: {
54463-
if (!IsSplat && VT == MVT::v4f64 && llvm::all_of(Ops, [](SDValue Op) {
54463+
if (!IsSplat && llvm::all_of(Ops, [](SDValue Op) {
5446454464
return Op.getOperand(0).getValueType().is128BitVector();
54465-
}))
54466-
return DAG.getNode(X86ISD::MOVDDUP, DL, VT,
54467-
ConcatSubOperand(VT, Ops, 0));
54465+
})) {
54466+
if (VT == MVT::v4f64 || VT == MVT::v4i64)
54467+
return DAG.getNode(X86ISD::UNPCKL, DL, VT,
54468+
ConcatSubOperand(VT, Ops, 0),
54469+
ConcatSubOperand(VT, Ops, 0));
54470+
// TODO: Add pseudo v8i32 PSHUFD handling to AVX1Only targets.
54471+
if (VT == MVT::v8f32 || (VT == MVT::v8i32 && Subtarget.hasInt256()))
54472+
return DAG.getNode(VT == MVT::v8f32 ? X86ISD::VPERMILPI
54473+
: X86ISD::PSHUFD,
54474+
DL, VT, ConcatSubOperand(VT, Ops, 0),
54475+
getV4X86ShuffleImm8ForMask({0, 0, 0, 0}, DL, DAG));
54476+
}
5446854477
break;
5446954478
}
5447054479
case X86ISD::MOVDDUP:

llvm/test/CodeGen/X86/vector-shuffle-256-v4.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1550,16 +1550,16 @@ define <4 x i64> @shuffle_v4i64_0044_v2i64(<2 x i64> %a, <2 x i64> %b) {
15501550
;
15511551
; AVX2-LABEL: shuffle_v4i64_0044_v2i64:
15521552
; AVX2: # %bb.0:
1553-
; AVX2-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1554-
; AVX2-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1553+
; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
15551554
; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1555+
; AVX2-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
15561556
; AVX2-NEXT: retq
15571557
;
15581558
; AVX512VL-LABEL: shuffle_v4i64_0044_v2i64:
15591559
; AVX512VL: # %bb.0:
1560-
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
1561-
; AVX512VL-NEXT: vmovddup {{.*#+}} xmm1 = xmm1[0,0]
1560+
; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
15621561
; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
1562+
; AVX512VL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
15631563
; AVX512VL-NEXT: retq
15641564
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
15651565
%2 = shufflevector <2 x i64> %b, <2 x i64> undef, <2 x i32> <i32 0, i32 0>

llvm/test/CodeGen/X86/vector-shuffle-256-v8.ll

Lines changed: 12 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -639,19 +639,12 @@ define <8 x float> @shuffle_v8f32_00224466_v4f32(<4 x float> %a, <4 x float> %b)
639639
}
640640

641641
define <8 x float> @shuffle_v8f32_00004444_v4f32(<4 x float> %a, <4 x float> %b) {
642-
; AVX1-LABEL: shuffle_v8f32_00004444_v4f32:
643-
; AVX1: # %bb.0:
644-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
645-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
646-
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
647-
; AVX1-NEXT: retq
648-
;
649-
; AVX2OR512VL-LABEL: shuffle_v8f32_00004444_v4f32:
650-
; AVX2OR512VL: # %bb.0:
651-
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
652-
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
653-
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
654-
; AVX2OR512VL-NEXT: retq
642+
; ALL-LABEL: shuffle_v8f32_00004444_v4f32:
643+
; ALL: # %bb.0:
644+
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
645+
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
646+
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
647+
; ALL-NEXT: retq
655648
%1 = shufflevector <4 x float> %a, <4 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
656649
ret <8 x float> %1
657650
}
@@ -3289,19 +3282,12 @@ define <8 x i32> @shuffle_v8i32_32107654_v4i32(<4 x i32> %a, <4 x i32> %b) {
32893282
}
32903283

32913284
define <8 x i32> @shuffle_v8i32_00004444_v4f32(<4 x i32> %a, <4 x i32> %b) {
3292-
; AVX1-LABEL: shuffle_v8i32_00004444_v4f32:
3293-
; AVX1: # %bb.0:
3294-
; AVX1-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3295-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3296-
; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
3297-
; AVX1-NEXT: retq
3298-
;
3299-
; AVX2OR512VL-LABEL: shuffle_v8i32_00004444_v4f32:
3300-
; AVX2OR512VL: # %bb.0:
3301-
; AVX2OR512VL-NEXT: vbroadcastss %xmm0, %xmm0
3302-
; AVX2OR512VL-NEXT: vbroadcastss %xmm1, %xmm1
3303-
; AVX2OR512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3304-
; AVX2OR512VL-NEXT: retq
3285+
; ALL-LABEL: shuffle_v8i32_00004444_v4f32:
3286+
; ALL: # %bb.0:
3287+
; ALL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0
3288+
; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
3289+
; ALL-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
3290+
; ALL-NEXT: retq
33053291
%1 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
33063292
ret <8 x i32> %1
33073293
}

0 commit comments

Comments
 (0)