Skip to content

Commit 40fa52b

Browse files
committed
[X86] lowerBuildVectorToBitOp - support build_vector(shift()) -> shift(build_vector(),C)
Commonly occurs in sign-extension cases llvm-svn: 361706
1 parent b0fd12b commit 40fa52b

File tree

7 files changed

+71
-101
lines changed

7 files changed

+71
-101
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8746,9 +8746,15 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
87468746
return SDValue();
87478747

87488748
// TODO: We may be able to add support for other Ops (ADD/SUB + shifts).
8749+
bool IsShift = false;
87498750
switch (Opcode) {
87508751
default:
87518752
return SDValue();
8753+
case ISD::SHL:
8754+
case ISD::SRL:
8755+
case ISD::SRA:
8756+
IsShift = true;
8757+
break;
87528758
case ISD::AND:
87538759
case ISD::XOR:
87548760
case ISD::OR:
@@ -8769,10 +8775,24 @@ static SDValue lowerBuildVectorToBitOp(BuildVectorSDNode *Op,
87698775
// We expect the canonicalized RHS operand to be the constant.
87708776
if (!isa<ConstantSDNode>(RHS))
87718777
return SDValue();
8778+
8779+
// Extend shift amounts.
8780+
if (RHS.getValueSizeInBits() != VT.getScalarSizeInBits()) {
8781+
if (!IsShift)
8782+
return SDValue();
8783+
RHS = DAG.getZExtOrTrunc(RHS, DL, VT.getScalarType());
8784+
}
8785+
87728786
LHSElts.push_back(LHS);
87738787
RHSElts.push_back(RHS);
87748788
}
87758789

8790+
// Limit to shifts by uniform immediates.
8791+
// TODO: Only accept vXi8/vXi64 special cases?
8792+
// TODO: Permit non-uniform XOP/AVX2/MULLO cases?
8793+
if (IsShift && any_of(RHSElts, [&](SDValue V) { return RHSElts[0] != V; }))
8794+
return SDValue();
8795+
87768796
SDValue LHS = DAG.getBuildVector(VT, DL, LHSElts);
87778797
SDValue RHS = DAG.getBuildVector(VT, DL, RHSElts);
87788798
return DAG.getNode(Opcode, DL, VT, LHS, RHS);

llvm/test/CodeGen/X86/rotate-extract-vector.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -86,13 +86,12 @@ define <2 x i64> @vrolq_extract_udiv(<2 x i64> %i) nounwind {
8686
; X64-NEXT: vpextrq $1, %xmm0, %rax
8787
; X64-NEXT: movabsq $-6148914691236517205, %rcx # imm = 0xAAAAAAAAAAAAAAAB
8888
; X64-NEXT: mulq %rcx
89-
; X64-NEXT: shrq %rdx
9089
; X64-NEXT: vmovq %rdx, %xmm1
9190
; X64-NEXT: vmovq %xmm0, %rax
9291
; X64-NEXT: mulq %rcx
93-
; X64-NEXT: shrq %rdx
9492
; X64-NEXT: vmovq %rdx, %xmm0
9593
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
94+
; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
9695
; X64-NEXT: vprolq $57, %zmm0, %zmm0
9796
; X64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0
9897
; X64-NEXT: vzeroupper
@@ -256,24 +255,22 @@ define <2 x i64> @no_extract_udiv(<2 x i64> %i) nounwind {
256255
; X64-NEXT: movabsq $-6148914691236517205, %rdi # imm = 0xAAAAAAAAAAAAAAAB
257256
; X64-NEXT: movq %rcx, %rax
258257
; X64-NEXT: mulq %rdi
259-
; X64-NEXT: shrq %rdx
260258
; X64-NEXT: vmovq %rdx, %xmm1
261259
; X64-NEXT: vmovq %xmm0, %rsi
262260
; X64-NEXT: movq %rsi, %rax
263261
; X64-NEXT: mulq %rdi
264-
; X64-NEXT: shrq %rdx
265262
; X64-NEXT: vmovq %rdx, %xmm0
266263
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
264+
; X64-NEXT: vpsrlq $1, %xmm0, %xmm0
267265
; X64-NEXT: movabsq $-6180857105216966645, %rdi # imm = 0xAA392F35DC17F00B
268266
; X64-NEXT: movq %rcx, %rax
269267
; X64-NEXT: mulq %rdi
270-
; X64-NEXT: shrq $9, %rdx
271268
; X64-NEXT: vmovq %rdx, %xmm1
272269
; X64-NEXT: movq %rsi, %rax
273270
; X64-NEXT: mulq %rdi
274-
; X64-NEXT: shrq $9, %rdx
275271
; X64-NEXT: vmovq %rdx, %xmm2
276272
; X64-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
273+
; X64-NEXT: vpsrlq $9, %xmm1, %xmm1
277274
; X64-NEXT: vpsllq $56, %xmm0, %xmm0
278275
; X64-NEXT: vpor %xmm1, %xmm0, %xmm0
279276
; X64-NEXT: retq

llvm/test/CodeGen/X86/vector-idiv-udiv-128.ll

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
1919
; SSE2-NEXT: subq %rdx, %rcx
2020
; SSE2-NEXT: shrq %rcx
2121
; SSE2-NEXT: addq %rdx, %rcx
22-
; SSE2-NEXT: shrq $2, %rcx
2322
; SSE2-NEXT: movq %rcx, %xmm1
2423
; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
2524
; SSE2-NEXT: movq %xmm0, %rcx
@@ -28,9 +27,9 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
2827
; SSE2-NEXT: subq %rdx, %rcx
2928
; SSE2-NEXT: shrq %rcx
3029
; SSE2-NEXT: addq %rdx, %rcx
31-
; SSE2-NEXT: shrq $2, %rcx
3230
; SSE2-NEXT: movq %rcx, %xmm0
3331
; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
32+
; SSE2-NEXT: psrlq $2, %xmm1
3433
; SSE2-NEXT: movdqa %xmm1, %xmm0
3534
; SSE2-NEXT: retq
3635
;
@@ -43,17 +42,16 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
4342
; SSE41-NEXT: subq %rdx, %rcx
4443
; SSE41-NEXT: shrq %rcx
4544
; SSE41-NEXT: addq %rdx, %rcx
46-
; SSE41-NEXT: shrq $2, %rcx
4745
; SSE41-NEXT: movq %rcx, %xmm1
4846
; SSE41-NEXT: movq %xmm0, %rcx
4947
; SSE41-NEXT: movq %rcx, %rax
5048
; SSE41-NEXT: mulq %rsi
5149
; SSE41-NEXT: subq %rdx, %rcx
5250
; SSE41-NEXT: shrq %rcx
5351
; SSE41-NEXT: addq %rdx, %rcx
54-
; SSE41-NEXT: shrq $2, %rcx
5552
; SSE41-NEXT: movq %rcx, %xmm0
5653
; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
54+
; SSE41-NEXT: psrlq $2, %xmm0
5755
; SSE41-NEXT: retq
5856
;
5957
; AVX-LABEL: test_div7_2i64:
@@ -65,17 +63,16 @@ define <2 x i64> @test_div7_2i64(<2 x i64> %a) nounwind {
6563
; AVX-NEXT: subq %rdx, %rcx
6664
; AVX-NEXT: shrq %rcx
6765
; AVX-NEXT: addq %rdx, %rcx
68-
; AVX-NEXT: shrq $2, %rcx
6966
; AVX-NEXT: vmovq %rcx, %xmm1
7067
; AVX-NEXT: vmovq %xmm0, %rcx
7168
; AVX-NEXT: movq %rcx, %rax
7269
; AVX-NEXT: mulq %rsi
7370
; AVX-NEXT: subq %rdx, %rcx
7471
; AVX-NEXT: shrq %rcx
7572
; AVX-NEXT: addq %rdx, %rcx
76-
; AVX-NEXT: shrq $2, %rcx
7773
; AVX-NEXT: vmovq %rcx, %xmm0
7874
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
75+
; AVX-NEXT: vpsrlq $2, %xmm0, %xmm0
7976
; AVX-NEXT: retq
8077
%res = udiv <2 x i64> %a, <i64 7, i64 7>
8178
ret <2 x i64> %res

llvm/test/CodeGen/X86/vector-idiv-udiv-256.ll

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,43 +10,41 @@
1010
define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
1111
; AVX1-LABEL: test_div7_4i64:
1212
; AVX1: # %bb.0:
13-
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1
14-
; AVX1-NEXT: vpextrq $1, %xmm1, %rcx
13+
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
1514
; AVX1-NEXT: movabsq $2635249153387078803, %rsi # imm = 0x2492492492492493
1615
; AVX1-NEXT: movq %rcx, %rax
1716
; AVX1-NEXT: mulq %rsi
1817
; AVX1-NEXT: subq %rdx, %rcx
1918
; AVX1-NEXT: shrq %rcx
2019
; AVX1-NEXT: addq %rdx, %rcx
21-
; AVX1-NEXT: shrq $2, %rcx
22-
; AVX1-NEXT: vmovq %rcx, %xmm2
23-
; AVX1-NEXT: vmovq %xmm1, %rcx
20+
; AVX1-NEXT: vmovq %rcx, %xmm1
21+
; AVX1-NEXT: vmovq %xmm0, %rcx
2422
; AVX1-NEXT: movq %rcx, %rax
2523
; AVX1-NEXT: mulq %rsi
2624
; AVX1-NEXT: subq %rdx, %rcx
2725
; AVX1-NEXT: shrq %rcx
2826
; AVX1-NEXT: addq %rdx, %rcx
29-
; AVX1-NEXT: shrq $2, %rcx
30-
; AVX1-NEXT: vmovq %rcx, %xmm1
31-
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
27+
; AVX1-NEXT: vmovq %rcx, %xmm2
28+
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0]
29+
; AVX1-NEXT: vpsrlq $2, %xmm1, %xmm1
30+
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
3231
; AVX1-NEXT: vpextrq $1, %xmm0, %rcx
3332
; AVX1-NEXT: movq %rcx, %rax
3433
; AVX1-NEXT: mulq %rsi
3534
; AVX1-NEXT: subq %rdx, %rcx
3635
; AVX1-NEXT: shrq %rcx
3736
; AVX1-NEXT: addq %rdx, %rcx
38-
; AVX1-NEXT: shrq $2, %rcx
3937
; AVX1-NEXT: vmovq %rcx, %xmm2
4038
; AVX1-NEXT: vmovq %xmm0, %rcx
4139
; AVX1-NEXT: movq %rcx, %rax
4240
; AVX1-NEXT: mulq %rsi
4341
; AVX1-NEXT: subq %rdx, %rcx
4442
; AVX1-NEXT: shrq %rcx
4543
; AVX1-NEXT: addq %rdx, %rcx
46-
; AVX1-NEXT: shrq $2, %rcx
4744
; AVX1-NEXT: vmovq %rcx, %xmm0
4845
; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
49-
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
46+
; AVX1-NEXT: vpsrlq $2, %xmm0, %xmm0
47+
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
5048
; AVX1-NEXT: retq
5149
;
5250
; AVX2-LABEL: test_div7_4i64:
@@ -59,15 +57,13 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
5957
; AVX2-NEXT: subq %rdx, %rcx
6058
; AVX2-NEXT: shrq %rcx
6159
; AVX2-NEXT: addq %rdx, %rcx
62-
; AVX2-NEXT: shrq $2, %rcx
6360
; AVX2-NEXT: vmovq %rcx, %xmm2
6461
; AVX2-NEXT: vmovq %xmm1, %rcx
6562
; AVX2-NEXT: movq %rcx, %rax
6663
; AVX2-NEXT: mulq %rsi
6764
; AVX2-NEXT: subq %rdx, %rcx
6865
; AVX2-NEXT: shrq %rcx
6966
; AVX2-NEXT: addq %rdx, %rcx
70-
; AVX2-NEXT: shrq $2, %rcx
7167
; AVX2-NEXT: vmovq %rcx, %xmm1
7268
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
7369
; AVX2-NEXT: vpextrq $1, %xmm0, %rcx
@@ -76,18 +72,17 @@ define <4 x i64> @test_div7_4i64(<4 x i64> %a) nounwind {
7672
; AVX2-NEXT: subq %rdx, %rcx
7773
; AVX2-NEXT: shrq %rcx
7874
; AVX2-NEXT: addq %rdx, %rcx
79-
; AVX2-NEXT: shrq $2, %rcx
8075
; AVX2-NEXT: vmovq %rcx, %xmm2
8176
; AVX2-NEXT: vmovq %xmm0, %rcx
8277
; AVX2-NEXT: movq %rcx, %rax
8378
; AVX2-NEXT: mulq %rsi
8479
; AVX2-NEXT: subq %rdx, %rcx
8580
; AVX2-NEXT: shrq %rcx
8681
; AVX2-NEXT: addq %rdx, %rcx
87-
; AVX2-NEXT: shrq $2, %rcx
8882
; AVX2-NEXT: vmovq %rcx, %xmm0
8983
; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0]
9084
; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0
85+
; AVX2-NEXT: vpsrlq $2, %ymm0, %ymm0
9186
; AVX2-NEXT: retq
9287
%res = udiv <4 x i64> %a, <i64 7, i64 7, i64 7, i64 7>
9388
ret <4 x i64> %res

llvm/test/CodeGen/X86/vector-idiv-udiv-512.ll

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,15 +17,13 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
1717
; AVX-NEXT: subq %rdx, %rcx
1818
; AVX-NEXT: shrq %rcx
1919
; AVX-NEXT: addq %rdx, %rcx
20-
; AVX-NEXT: shrq $2, %rcx
2120
; AVX-NEXT: vmovq %rcx, %xmm2
2221
; AVX-NEXT: vmovq %xmm1, %rcx
2322
; AVX-NEXT: movq %rcx, %rax
2423
; AVX-NEXT: mulq %rsi
2524
; AVX-NEXT: subq %rdx, %rcx
2625
; AVX-NEXT: shrq %rcx
2726
; AVX-NEXT: addq %rdx, %rcx
28-
; AVX-NEXT: shrq $2, %rcx
2927
; AVX-NEXT: vmovq %rcx, %xmm1
3028
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0]
3129
; AVX-NEXT: vextracti32x4 $2, %zmm0, %xmm2
@@ -35,15 +33,13 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
3533
; AVX-NEXT: subq %rdx, %rcx
3634
; AVX-NEXT: shrq %rcx
3735
; AVX-NEXT: addq %rdx, %rcx
38-
; AVX-NEXT: shrq $2, %rcx
3936
; AVX-NEXT: vmovq %rcx, %xmm3
4037
; AVX-NEXT: vmovq %xmm2, %rcx
4138
; AVX-NEXT: movq %rcx, %rax
4239
; AVX-NEXT: mulq %rsi
4340
; AVX-NEXT: subq %rdx, %rcx
4441
; AVX-NEXT: shrq %rcx
4542
; AVX-NEXT: addq %rdx, %rcx
46-
; AVX-NEXT: shrq $2, %rcx
4743
; AVX-NEXT: vmovq %rcx, %xmm2
4844
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
4945
; AVX-NEXT: vinserti128 $1, %xmm1, %ymm2, %ymm1
@@ -54,15 +50,13 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
5450
; AVX-NEXT: subq %rdx, %rcx
5551
; AVX-NEXT: shrq %rcx
5652
; AVX-NEXT: addq %rdx, %rcx
57-
; AVX-NEXT: shrq $2, %rcx
5853
; AVX-NEXT: vmovq %rcx, %xmm3
5954
; AVX-NEXT: vmovq %xmm2, %rcx
6055
; AVX-NEXT: movq %rcx, %rax
6156
; AVX-NEXT: mulq %rsi
6257
; AVX-NEXT: subq %rdx, %rcx
6358
; AVX-NEXT: shrq %rcx
6459
; AVX-NEXT: addq %rdx, %rcx
65-
; AVX-NEXT: shrq $2, %rcx
6660
; AVX-NEXT: vmovq %rcx, %xmm2
6761
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0]
6862
; AVX-NEXT: vpextrq $1, %xmm0, %rcx
@@ -71,19 +65,18 @@ define <8 x i64> @test_div7_8i64(<8 x i64> %a) nounwind {
7165
; AVX-NEXT: subq %rdx, %rcx
7266
; AVX-NEXT: shrq %rcx
7367
; AVX-NEXT: addq %rdx, %rcx
74-
; AVX-NEXT: shrq $2, %rcx
7568
; AVX-NEXT: vmovq %rcx, %xmm3
7669
; AVX-NEXT: vmovq %xmm0, %rcx
7770
; AVX-NEXT: movq %rcx, %rax
7871
; AVX-NEXT: mulq %rsi
7972
; AVX-NEXT: subq %rdx, %rcx
8073
; AVX-NEXT: shrq %rcx
8174
; AVX-NEXT: addq %rdx, %rcx
82-
; AVX-NEXT: shrq $2, %rcx
8375
; AVX-NEXT: vmovq %rcx, %xmm0
8476
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm3[0]
8577
; AVX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0
8678
; AVX-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
79+
; AVX-NEXT: vpsrlq $2, %zmm0, %zmm0
8780
; AVX-NEXT: retq
8881
%res = udiv <8 x i64> %a, <i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7, i64 7>
8982
ret <8 x i64> %res

0 commit comments

Comments
 (0)