Skip to content

Commit 6ee4f25

Browse files
committed
[GlobalISel] Add G_BUILD_VECTOR[_TRUNC] to CSE
Add G_BUILD_VECTOR and G_BUILD_VECTOR_TRUNC to the list of opcodes in `shouldCSEOpc`. This simplifies the code generated for vector splats. Differential Revision: https://reviews.llvm.org/D140965
1 parent 22924bd commit 6ee4f25

File tree

8 files changed

+68
-92
lines changed

8 files changed

+68
-92
lines changed

llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
6262
case TargetOpcode::G_PTR_ADD:
6363
case TargetOpcode::G_EXTRACT:
6464
case TargetOpcode::G_SELECT:
65+
case TargetOpcode::G_BUILD_VECTOR:
66+
case TargetOpcode::G_BUILD_VECTOR_TRUNC:
6567
return true;
6668
}
6769
return false;

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.ll

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -147,21 +147,21 @@ define <8 x i16> @combine_vec_udiv_nonuniform3(<8 x i16> %x) {
147147
; GISEL-LABEL: combine_vec_udiv_nonuniform3:
148148
; GISEL: // %bb.0:
149149
; GISEL-NEXT: adrp x8, .LCPI3_2
150-
; GISEL-NEXT: adrp x9, .LCPI3_3
150+
; GISEL-NEXT: adrp x9, .LCPI3_1
151151
; GISEL-NEXT: ldr q1, [x8, :lo12:.LCPI3_2]
152-
; GISEL-NEXT: adrp x8, .LCPI3_1
153-
; GISEL-NEXT: ldr q4, [x9, :lo12:.LCPI3_3]
152+
; GISEL-NEXT: adrp x8, .LCPI3_3
153+
; GISEL-NEXT: ldr q3, [x9, :lo12:.LCPI3_1]
154154
; GISEL-NEXT: umull2 v2.4s, v0.8h, v1.8h
155155
; GISEL-NEXT: umull v1.4s, v0.4h, v1.4h
156+
; GISEL-NEXT: neg v3.8h, v3.8h
156157
; GISEL-NEXT: uzp2 v1.8h, v1.8h, v2.8h
157-
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_1]
158+
; GISEL-NEXT: ldr q2, [x8, :lo12:.LCPI3_3]
158159
; GISEL-NEXT: adrp x8, .LCPI3_0
159-
; GISEL-NEXT: neg v2.8h, v2.8h
160-
; GISEL-NEXT: sub v3.8h, v0.8h, v1.8h
161-
; GISEL-NEXT: usra v1.8h, v3.8h, #1
162-
; GISEL-NEXT: ldr q3, [x8, :lo12:.LCPI3_0]
163-
; GISEL-NEXT: ushl v1.8h, v1.8h, v2.8h
164-
; GISEL-NEXT: cmeq v2.8h, v4.8h, v3.8h
160+
; GISEL-NEXT: sub v4.8h, v0.8h, v1.8h
161+
; GISEL-NEXT: ldr q5, [x8, :lo12:.LCPI3_0]
162+
; GISEL-NEXT: usra v1.8h, v4.8h, #1
163+
; GISEL-NEXT: cmeq v2.8h, v2.8h, v5.8h
164+
; GISEL-NEXT: ushl v1.8h, v1.8h, v3.8h
165165
; GISEL-NEXT: bif v0.16b, v1.16b, v2.16b
166166
; GISEL-NEXT: ret
167167
%1 = udiv <8 x i16> %x, <i16 7, i16 23, i16 25, i16 27, i16 31, i16 47, i16 63, i16 127>

llvm/test/CodeGen/AArch64/GlobalISel/combine-udiv.mir

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -228,8 +228,7 @@ body: |
228228
; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[SUB]], [[BUILD_VECTOR3]](<8 x s16>)
229229
; CHECK-NEXT: [[ADD:%[0-9]+]]:_(<8 x s16>) = G_ADD [[LSHR]], [[UMULH]]
230230
; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(<8 x s16>) = G_LSHR [[ADD]], [[BUILD_VECTOR2]](<8 x s16>)
231-
; CHECK-NEXT: [[BUILD_VECTOR4:%[0-9]+]]:_(<8 x s16>) = G_BUILD_VECTOR [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16), [[C20]](s16)
232-
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR4]]
231+
; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(<8 x s1>) = G_ICMP intpred(eq), [[BUILD_VECTOR]](<8 x s16>), [[BUILD_VECTOR3]]
233232
; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(<8 x s16>) = G_SELECT [[ICMP]](<8 x s1>), [[COPY]], [[LSHR1]]
234233
; CHECK-NEXT: $q0 = COPY [[SELECT]](<8 x s16>)
235234
; CHECK-NEXT: RET_ReallyLR implicit $q0

llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp-arith.mir

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -98,11 +98,8 @@ body: |
9898
; CHECK-LABEL: name: test_fmul_v8s32
9999
; CHECK: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF
100100
; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
101-
; CHECK: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
102-
; CHECK: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
103-
; CHECK: [[BUILD_VECTOR3:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[DEF]](s32), [[DEF]](s32), [[DEF]](s32), [[DEF]](s32)
104-
; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR2]]
105-
; CHECK: [[FMUL1:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR1]], [[BUILD_VECTOR3]]
101+
; CHECK: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR]]
102+
; CHECK: [[FMUL1:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR]]
106103
; CHECK: $q0 = COPY [[FMUL]](<4 x s32>)
107104
; CHECK: $q1 = COPY [[FMUL1]](<4 x s32>)
108105
%0:_(<8 x s32>) = G_IMPLICIT_DEF

llvm/test/CodeGen/AArch64/GlobalISel/legalize-min-max.mir

Lines changed: 28 additions & 68 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/GlobalISel/prelegalizer-combiner-mulo-zero.mir

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,8 @@ body: |
8787
; CHECK: liveins: $q0, $x0
8888
; CHECK-NEXT: {{ $}}
8989
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
90-
; CHECK-NEXT: %mulo:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
90+
; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
91+
; CHECK-NEXT: %mulo:_(<2 x s64>) = COPY %zero_vec(<2 x s64>)
9192
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
9293
; CHECK-NEXT: %carry:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1)
9394
; CHECK-NEXT: %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry(<2 x s1>), %zero(s64)
@@ -115,7 +116,8 @@ body: |
115116
; CHECK: liveins: $q0, $x0
116117
; CHECK-NEXT: {{ $}}
117118
; CHECK-NEXT: %zero:_(s64) = G_CONSTANT i64 0
118-
; CHECK-NEXT: %mulo:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
119+
; CHECK-NEXT: %zero_vec:_(<2 x s64>) = G_BUILD_VECTOR %zero(s64), %zero(s64)
120+
; CHECK-NEXT: %mulo:_(<2 x s64>) = COPY %zero_vec(<2 x s64>)
119121
; CHECK-NEXT: [[C:%[0-9]+]]:_(s1) = G_CONSTANT i1 false
120122
; CHECK-NEXT: %carry:_(<2 x s1>) = G_BUILD_VECTOR [[C]](s1), [[C]](s1)
121123
; CHECK-NEXT: %carry_elt_0:_(s1) = G_EXTRACT_VECTOR_ELT %carry(<2 x s1>), %zero(s64)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector-splat.mir

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
# RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -run-pass=legalizer %s -o - | FileCheck %s
33

44
# Make sure we CSE when building a vector splat.
5-
# See https://reviews.llvm.org/D140965
65
---
76
name: build_vector_v8s16_splat
87
body: |
@@ -13,10 +12,7 @@ body: |
1312
; CHECK-NEXT: {{ $}}
1413
; CHECK-NEXT: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 16256
1514
; CHECK-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
16-
; CHECK-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
17-
; CHECK-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
18-
; CHECK-NEXT: [[BUILD_VECTOR3:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[C]](s16), [[C]](s16)
19-
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR1]](<2 x s16>), [[BUILD_VECTOR2]](<2 x s16>), [[BUILD_VECTOR3]](<2 x s16>)
15+
; CHECK-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<8 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>), [[BUILD_VECTOR]](<2 x s16>)
2016
; CHECK-NEXT: S_NOP 0, implicit [[CONCAT_VECTORS]](<8 x s16>)
2117
%1:_(s32) = COPY $vgpr0
2218
%2:_(s32) = COPY $vgpr1

llvm/unittests/CodeGen/GlobalISel/CSETest.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,26 @@ TEST_F(AArch64GISelMITest, TestCSE) {
7575
auto MIBUnmerge2 = CSEB.buildUnmerge({s32, s32}, Copies[0]);
7676
EXPECT_TRUE(&*MIBUnmerge == &*MIBUnmerge2);
7777

78+
// Check G_BUILD_VECTOR
79+
Register Reg1 = MRI->createGenericVirtualRegister(s32);
80+
Register Reg2 = MRI->createGenericVirtualRegister(s32);
81+
auto BuildVec1 =
82+
CSEB.buildBuildVector(LLT::fixed_vector(4, 32), {Reg1, Reg2, Reg1, Reg2});
83+
auto BuildVec2 =
84+
CSEB.buildBuildVector(LLT::fixed_vector(4, 32), {Reg1, Reg2, Reg1, Reg2});
85+
EXPECT_EQ(TargetOpcode::G_BUILD_VECTOR, BuildVec1->getOpcode());
86+
EXPECT_EQ(TargetOpcode::G_BUILD_VECTOR, BuildVec2->getOpcode());
87+
EXPECT_TRUE(&*BuildVec1 == &*BuildVec2);
88+
89+
// Check G_BUILD_VECTOR_TRUNC
90+
auto BuildVecTrunc1 = CSEB.buildBuildVectorTrunc(LLT::fixed_vector(4, 16),
91+
{Reg1, Reg2, Reg1, Reg2});
92+
auto BuildVecTrunc2 = CSEB.buildBuildVectorTrunc(LLT::fixed_vector(4, 16),
93+
{Reg1, Reg2, Reg1, Reg2});
94+
EXPECT_EQ(TargetOpcode::G_BUILD_VECTOR_TRUNC, BuildVecTrunc1->getOpcode());
95+
EXPECT_EQ(TargetOpcode::G_BUILD_VECTOR_TRUNC, BuildVecTrunc2->getOpcode());
96+
EXPECT_TRUE(&*BuildVecTrunc1 == &*BuildVecTrunc2);
97+
7898
// Check G_IMPLICIT_DEF
7999
auto Undef0 = CSEB.buildUndef(s32);
80100
auto Undef1 = CSEB.buildUndef(s32);

0 commit comments

Comments
 (0)