Skip to content

Commit cb2d8b3

Browse files
author
Jessica Paquette
committed
[AArch64][GlobalISel] Select trn1 and trn2
Same idea as for zip, uzp, etc. Teach the post-legalizer combiner to recognize G_SHUFFLE_VECTORs that are trn1/trn2 instructions. - Add G_TRN1 and G_TRN2 - Port mask matching code from AArch64ISelLowering - Produce G_TRN1 and G_TRN2 in the post-legalizer combiner - Select via importer Add select-trn.mir to test selection. Add postlegalizer-combiner-trn.mir to test the combine. This is similar to the existing arm64-trn test. Note that both of these tests contain things we currently don't legalize. I figured it would be easier to test these now rather than later, since once we legalize the G_SHUFFLE_VECTORs, it's not guaranteed that someone will update the tests. Differential Revision: https://reviews.llvm.org/D81182
1 parent de019b8 commit cb2d8b3

File tree

5 files changed

+591
-1
lines changed

5 files changed

+591
-1
lines changed

llvm/lib/Target/AArch64/AArch64Combine.td

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -56,9 +56,16 @@ def dup: GICombineRule <
5656
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
5757
>;
5858

59+
def trn : GICombineRule<
60+
(defs root:$root, shuffle_matchdata:$matchinfo),
61+
(match (wip_match_opcode G_SHUFFLE_VECTOR):$root,
62+
[{ return matchTRN(*${root}, MRI, ${matchinfo}); }]),
63+
(apply [{ applyShuffleVectorPseudo(*${root}, ${matchinfo}); }])
64+
>;
65+
5966
// Combines which replace a G_SHUFFLE_VECTOR with a target-specific pseudo
6067
// instruction.
61-
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp]>;
68+
def shuffle_vector_pseudos : GICombineGroup<[dup, rev, zip, uzp, trn]>;
6269

6370
def AArch64PostLegalizerCombinerHelper
6471
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",

llvm/lib/Target/AArch64/AArch64InstrGISel.td

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,19 @@ def G_DUP: AArch64GenericInstruction {
8080
let OutOperandList = (outs type0:$dst);
8181
let InOperandList = (ins type1:$lane);
8282
}
83+
// Represents a trn1 instruction. Produced post-legalization from
84+
// G_SHUFFLE_VECTORs with appropriate masks.
85+
def G_TRN1 : AArch64GenericInstruction {
86+
let OutOperandList = (outs type0:$dst);
87+
let InOperandList = (ins type0:$v1, type0:$v2);
88+
}
89+
90+
// Represents a trn2 instruction. Produced post-legalization from
91+
// G_SHUFFLE_VECTORs with appropriate masks.
92+
def G_TRN2 : AArch64GenericInstruction {
93+
let OutOperandList = (outs type0:$dst);
94+
let InOperandList = (ins type0:$v1, type0:$v2);
95+
}
8396

8497
def : GINodeEquiv<G_REV16, AArch64rev16>;
8598
def : GINodeEquiv<G_REV32, AArch64rev32>;
@@ -89,3 +102,5 @@ def : GINodeEquiv<G_UZP2, AArch64uzp2>;
89102
def : GINodeEquiv<G_ZIP1, AArch64zip1>;
90103
def : GINodeEquiv<G_ZIP2, AArch64zip2>;
91104
def : GINodeEquiv<G_DUP, AArch64dup>;
105+
def : GINodeEquiv<G_TRN1, AArch64trn1>;
106+
def : GINodeEquiv<G_TRN2, AArch64trn2>;

llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,22 @@ static bool isREVMask(ArrayRef<int> M, unsigned EltSize, unsigned NumElts,
9595
return true;
9696
}
9797

98+
/// Determines if \p M is a shuffle vector mask for a TRN of \p NumElts.
99+
/// Whether or not G_TRN1 or G_TRN2 should be used is stored in \p WhichResult.
100+
static bool isTRNMask(ArrayRef<int> M, unsigned NumElts,
101+
unsigned &WhichResult) {
102+
if (NumElts % 2 != 0)
103+
return false;
104+
WhichResult = (M[0] == 0 ? 0 : 1);
105+
for (unsigned i = 0; i < NumElts; i += 2) {
106+
if ((M[i] >= 0 && static_cast<unsigned>(M[i]) != i + WhichResult) ||
107+
(M[i + 1] >= 0 &&
108+
static_cast<unsigned>(M[i + 1]) != i + NumElts + WhichResult))
109+
return false;
110+
}
111+
return true;
112+
}
113+
98114
/// Determines if \p M is a shuffle vector mask for a UZP of \p NumElts.
99115
/// Whether or not G_UZP1 or G_UZP2 should be used is stored in \p WhichResult.
100116
static bool isUZPMask(ArrayRef<int> M, unsigned NumElts,
@@ -158,6 +174,24 @@ static bool matchREV(MachineInstr &MI, MachineRegisterInfo &MRI,
158174
return false;
159175
}
160176

177+
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
178+
/// a G_TRN1 or G_TRN2 instruction.
179+
static bool matchTRN(MachineInstr &MI, MachineRegisterInfo &MRI,
180+
ShuffleVectorPseudo &MatchInfo) {
181+
assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
182+
unsigned WhichResult;
183+
ArrayRef<int> ShuffleMask = MI.getOperand(3).getShuffleMask();
184+
Register Dst = MI.getOperand(0).getReg();
185+
unsigned NumElts = MRI.getType(Dst).getNumElements();
186+
if (!isTRNMask(ShuffleMask, NumElts, WhichResult))
187+
return false;
188+
unsigned Opc = (WhichResult == 0) ? AArch64::G_TRN1 : AArch64::G_TRN2;
189+
Register V1 = MI.getOperand(1).getReg();
190+
Register V2 = MI.getOperand(2).getReg();
191+
MatchInfo = ShuffleVectorPseudo(Opc, Dst, {V1, V2});
192+
return true;
193+
}
194+
161195
/// \return true if a G_SHUFFLE_VECTOR instruction \p MI can be replaced with
162196
/// a G_UZP1 or G_UZP2 instruction.
163197
///
Lines changed: 234 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,234 @@
1+
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2+
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
3+
#
4+
# Check that we produce G_TRN1 or G_TRN2 when we have an appropriate shuffle
5+
# mask.
6+
#
7+
8+
...
9+
---
10+
name: trn1_v8s8
11+
alignment: 4
12+
legalized: true
13+
tracksRegLiveness: true
14+
body: |
15+
bb.1.entry:
16+
liveins: $d0, $d1
17+
; CHECK-LABEL: name: trn1_v8s8
18+
; CHECK: liveins: $d0, $d1
19+
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
20+
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
21+
; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
22+
; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>)
23+
; CHECK: RET_ReallyLR implicit $q0
24+
%0:_(<8 x s8>) = COPY $d0
25+
%1:_(<8 x s8>) = COPY $d1
26+
%2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, 2, 10, 4, 12, 6, 14)
27+
$d0 = COPY %2(<8 x s8>)
28+
RET_ReallyLR implicit $q0
29+
30+
...
31+
---
32+
name: trn2_v8s8
33+
alignment: 4
34+
legalized: true
35+
tracksRegLiveness: true
36+
body: |
37+
bb.1.entry:
38+
liveins: $d0, $d1
39+
; CHECK-LABEL: name: trn2_v8s8
40+
; CHECK: liveins: $d0, $d1
41+
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
42+
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
43+
; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
44+
; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>)
45+
; CHECK: RET_ReallyLR implicit $q0
46+
%0:_(<8 x s8>) = COPY $d0
47+
%1:_(<8 x s8>) = COPY $d1
48+
%2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, 9, 3, 11, 5, 13, 7, 15)
49+
$d0 = COPY %2(<8 x s8>)
50+
RET_ReallyLR implicit $q0
51+
52+
...
53+
---
54+
name: trn1_v16s8
55+
alignment: 4
56+
legalized: true
57+
tracksRegLiveness: true
58+
body: |
59+
bb.1.entry:
60+
liveins: $q0, $q1
61+
; CHECK-LABEL: name: trn1_v16s8
62+
; CHECK: liveins: $q0, $q1
63+
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
64+
; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
65+
; CHECK: [[TRN1_:%[0-9]+]]:_(<16 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
66+
; CHECK: $q0 = COPY [[TRN1_]](<16 x s8>)
67+
; CHECK: RET_ReallyLR implicit $q0
68+
%0:_(<16 x s8>) = COPY $q0
69+
%1:_(<16 x s8>) = COPY $q1
70+
%2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30)
71+
$q0 = COPY %2(<16 x s8>)
72+
RET_ReallyLR implicit $q0
73+
74+
...
75+
---
76+
name: trn2_v16s8
77+
alignment: 4
78+
legalized: true
79+
tracksRegLiveness: true
80+
body: |
81+
bb.1.entry:
82+
liveins: $q0, $q1
83+
; CHECK-LABEL: name: trn2_v16s8
84+
; CHECK: liveins: $q0, $q1
85+
; CHECK: [[COPY:%[0-9]+]]:_(<16 x s8>) = COPY $q0
86+
; CHECK: [[COPY1:%[0-9]+]]:_(<16 x s8>) = COPY $q1
87+
; CHECK: [[TRN2_:%[0-9]+]]:_(<16 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
88+
; CHECK: $q0 = COPY [[TRN2_]](<16 x s8>)
89+
; CHECK: RET_ReallyLR implicit $q0
90+
%0:_(<16 x s8>) = COPY $q0
91+
%1:_(<16 x s8>) = COPY $q1
92+
%2:_(<16 x s8>) = G_SHUFFLE_VECTOR %0(<16 x s8>), %1, shufflemask(1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31)
93+
$q0 = COPY %2(<16 x s8>)
94+
RET_ReallyLR implicit $q0
95+
96+
...
97+
---
98+
name: trn1_v4s32
99+
alignment: 4
100+
legalized: true
101+
tracksRegLiveness: true
102+
body: |
103+
bb.1.entry:
104+
liveins: $q0, $q1
105+
; CHECK-LABEL: name: trn1_v4s32
106+
; CHECK: liveins: $q0, $q1
107+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
108+
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
109+
; CHECK: [[TRN1_:%[0-9]+]]:_(<4 x s32>) = G_TRN1 [[COPY]], [[COPY1]]
110+
; CHECK: $q0 = COPY [[TRN1_]](<4 x s32>)
111+
; CHECK: RET_ReallyLR implicit $q0
112+
%0:_(<4 x s32>) = COPY $q0
113+
%1:_(<4 x s32>) = COPY $q1
114+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(0, 4, 2, 6)
115+
$q0 = COPY %2(<4 x s32>)
116+
RET_ReallyLR implicit $q0
117+
118+
...
119+
---
120+
name: trn2_v4s32
121+
alignment: 4
122+
legalized: true
123+
tracksRegLiveness: true
124+
body: |
125+
bb.1.entry:
126+
liveins: $q0, $q1
127+
; CHECK-LABEL: name: trn2_v4s32
128+
; CHECK: liveins: $q0, $q1
129+
; CHECK: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
130+
; CHECK: [[COPY1:%[0-9]+]]:_(<4 x s32>) = COPY $q1
131+
; CHECK: [[TRN2_:%[0-9]+]]:_(<4 x s32>) = G_TRN2 [[COPY]], [[COPY1]]
132+
; CHECK: $q0 = COPY [[TRN2_]](<4 x s32>)
133+
; CHECK: RET_ReallyLR implicit $q0
134+
%0:_(<4 x s32>) = COPY $q0
135+
%1:_(<4 x s32>) = COPY $q1
136+
%2:_(<4 x s32>) = G_SHUFFLE_VECTOR %0(<4 x s32>), %1, shufflemask(1, 5, 3, 7)
137+
$q0 = COPY %2(<4 x s32>)
138+
RET_ReallyLR implicit $q0
139+
140+
...
141+
---
142+
name: redundant_with_zip1
143+
alignment: 4
144+
legalized: true
145+
tracksRegLiveness: true
146+
body: |
147+
bb.1.entry:
148+
liveins: $d0, $d1
149+
; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP.
150+
;
151+
; CHECK-LABEL: name: redundant_with_zip1
152+
; CHECK: liveins: $d0, $d1
153+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
154+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
155+
; CHECK: [[ZIP1_:%[0-9]+]]:_(<2 x s32>) = G_ZIP1 [[COPY]], [[COPY1]]
156+
; CHECK: $d0 = COPY [[ZIP1_]](<2 x s32>)
157+
; CHECK: RET_ReallyLR implicit $d0
158+
%0:_(<2 x s32>) = COPY $d0
159+
%1:_(<2 x s32>) = COPY $d1
160+
%2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(0, 2)
161+
$d0 = COPY %2(<2 x s32>)
162+
RET_ReallyLR implicit $d0
163+
164+
...
165+
---
166+
name: redundant_with_zip2
167+
alignment: 4
168+
legalized: true
169+
tracksRegLiveness: true
170+
body: |
171+
bb.1.entry:
172+
liveins: $d0, $d1
173+
; 2 x s32 TRN is redundant with ZIP. Make sure we prioritize ZIP.
174+
;
175+
; CHECK-LABEL: name: redundant_with_zip2
176+
; CHECK: liveins: $d0, $d1
177+
; CHECK: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $d0
178+
; CHECK: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $d1
179+
; CHECK: [[ZIP2_:%[0-9]+]]:_(<2 x s32>) = G_ZIP2 [[COPY]], [[COPY1]]
180+
; CHECK: $d0 = COPY [[ZIP2_]](<2 x s32>)
181+
; CHECK: RET_ReallyLR implicit $d0
182+
%0:_(<2 x s32>) = COPY $d0
183+
%1:_(<2 x s32>) = COPY $d1
184+
%2:_(<2 x s32>) = G_SHUFFLE_VECTOR %0(<2 x s32>), %1, shufflemask(1, 3)
185+
$d0 = COPY %2(<2 x s32>)
186+
RET_ReallyLR implicit $d0
187+
188+
...
189+
---
190+
name: trn1_undef
191+
alignment: 4
192+
legalized: true
193+
tracksRegLiveness: true
194+
body: |
195+
bb.1.entry:
196+
liveins: $d0, $d1
197+
; Undef shuffle indices should not prevent matching to G_TRN1.
198+
;
199+
; CHECK-LABEL: name: trn1_undef
200+
; CHECK: liveins: $d0, $d1
201+
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
202+
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
203+
; CHECK: [[TRN1_:%[0-9]+]]:_(<8 x s8>) = G_TRN1 [[COPY]], [[COPY1]]
204+
; CHECK: $d0 = COPY [[TRN1_]](<8 x s8>)
205+
; CHECK: RET_ReallyLR implicit $d0
206+
%0:_(<8 x s8>) = COPY $d0
207+
%1:_(<8 x s8>) = COPY $d1
208+
%2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(0, 8, -1, -1, 4, 12, 6, 14)
209+
$d0 = COPY %2(<8 x s8>)
210+
RET_ReallyLR implicit $d0
211+
212+
...
213+
---
214+
name: trn2_undef
215+
alignment: 4
216+
legalized: true
217+
tracksRegLiveness: true
218+
body: |
219+
bb.1.entry:
220+
liveins: $d0, $d1
221+
; Undef shuffle indices should not prevent matching to G_TRN2.
222+
;
223+
; CHECK-LABEL: name: trn2_undef
224+
; CHECK: liveins: $d0, $d1
225+
; CHECK: [[COPY:%[0-9]+]]:_(<8 x s8>) = COPY $d0
226+
; CHECK: [[COPY1:%[0-9]+]]:_(<8 x s8>) = COPY $d1
227+
; CHECK: [[TRN2_:%[0-9]+]]:_(<8 x s8>) = G_TRN2 [[COPY]], [[COPY1]]
228+
; CHECK: $d0 = COPY [[TRN2_]](<8 x s8>)
229+
; CHECK: RET_ReallyLR implicit $d0
230+
%0:_(<8 x s8>) = COPY $d0
231+
%1:_(<8 x s8>) = COPY $d1
232+
%2:_(<8 x s8>) = G_SHUFFLE_VECTOR %0(<8 x s8>), %1, shufflemask(1, -1, 3, 11, 5, 13, -1, -1)
233+
$d0 = COPY %2(<8 x s8>)
234+
RET_ReallyLR implicit $d0

0 commit comments

Comments
 (0)