16
16
// ===----------------------------------------------------------------------===//
17
17
18
18
#include " AMDGPU.h"
19
- #include " llvm/CodeGen/MachineFunctionPass.h"
19
+ #include " AMDGPUGlobalISelUtils.h"
20
+ #include " GCNSubtarget.h"
21
+ #include " llvm/CodeGen/GlobalISel/CSEInfo.h"
22
+ #include " llvm/CodeGen/GlobalISel/CSEMIRBuilder.h"
23
+ #include " llvm/CodeGen/MachineUniformityAnalysis.h"
24
+ #include " llvm/CodeGen/TargetPassConfig.h"
20
25
#include " llvm/InitializePasses.h"
21
26
22
27
#define DEBUG_TYPE " amdgpu-regbankselect"
23
28
24
29
using namespace llvm ;
30
+ using namespace AMDGPU ;
25
31
26
32
namespace {
27
33
@@ -40,6 +46,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
40
46
}
41
47
42
48
void getAnalysisUsage (AnalysisUsage &AU) const override {
49
+ AU.addRequired <TargetPassConfig>();
50
+ AU.addRequired <GISelCSEAnalysisWrapperPass>();
51
+ AU.addRequired <MachineUniformityAnalysisPass>();
43
52
MachineFunctionPass::getAnalysisUsage (AU);
44
53
}
45
54
@@ -55,6 +64,9 @@ class AMDGPURegBankSelect : public MachineFunctionPass {
55
64
56
65
INITIALIZE_PASS_BEGIN (AMDGPURegBankSelect, DEBUG_TYPE,
57
66
" AMDGPU Register Bank Select" , false , false )
67
+ INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
68
+ INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass)
69
+ INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
58
70
INITIALIZE_PASS_END(AMDGPURegBankSelect, DEBUG_TYPE,
59
71
" AMDGPU Register Bank Select" , false , false )
60
72
@@ -66,9 +78,201 @@ FunctionPass *llvm::createAMDGPURegBankSelectPass() {
66
78
return new AMDGPURegBankSelect ();
67
79
}
68
80
81
+ class RegBankSelectHelper {
82
+ MachineIRBuilder &B;
83
+ MachineRegisterInfo &MRI;
84
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA;
85
+ const MachineUniformityInfo &MUI;
86
+ const RegisterBank *SgprRB;
87
+ const RegisterBank *VgprRB;
88
+ const RegisterBank *VccRB;
89
+
90
+ public:
91
+ RegBankSelectHelper (MachineIRBuilder &B,
92
+ AMDGPU::IntrinsicLaneMaskAnalyzer &ILMA,
93
+ const MachineUniformityInfo &MUI,
94
+ const RegisterBankInfo &RBI)
95
+ : B(B), MRI(*B.getMRI()), ILMA(ILMA), MUI(MUI),
96
+ SgprRB (&RBI.getRegBank(AMDGPU::SGPRRegBankID)),
97
+ VgprRB(&RBI.getRegBank(AMDGPU::VGPRRegBankID)),
98
+ VccRB(&RBI.getRegBank(AMDGPU::VCCRegBankID)) {}
99
+
100
+ const RegisterBank *getRegBankToAssign (Register Reg) {
101
+ if (MUI.isUniform (Reg) || ILMA.isS32S64LaneMask (Reg))
102
+ return SgprRB;
103
+ if (MRI.getType (Reg) == LLT::scalar (1 ))
104
+ return VccRB;
105
+ return VgprRB;
106
+ }
107
+
108
+ // %rc:RegClass(s32) = G_ ...
109
+ // ...
110
+ // %a = G_ ..., %rc
111
+ // ->
112
+ // %rb:RegBank(s32) = G_ ...
113
+ // %rc:RegClass(s32) = COPY %rb
114
+ // ...
115
+ // %a = G_ ..., %rb
116
+ void reAssignRegBankOnDef (MachineInstr &MI, MachineOperand &DefOP,
117
+ const RegisterBank *RB) {
118
+ // Register that already has Register class got it during pre-inst selection
119
+ // of another instruction. Maybe cross bank copy was required so we insert a
120
+ // copy that can be removed later. This simplifies post regbanklegalize
121
+ // combiner and avoids need to special case some patterns.
122
+ Register Reg = DefOP.getReg ();
123
+ LLT Ty = MRI.getType (Reg);
124
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
125
+ DefOP.setReg (NewReg);
126
+
127
+ auto &MBB = *MI.getParent ();
128
+ B.setInsertPt (MBB, MBB.SkipPHIsAndLabels (std::next (MI.getIterator ())));
129
+ B.buildCopy (Reg, NewReg);
130
+
131
+ // The problem was discovered for uniform S1 that was used as both
132
+ // lane mask(vcc) and regular sgpr S1.
133
+ // - lane-mask(vcc) use was by si_if, this use is divergent and requires
134
+ // non-trivial sgpr-S1-to-vcc copy. But pre-inst-selection of si_if sets
135
+ // sreg_64_xexec(S1) on def of uniform S1 making it lane-mask.
136
+ // - the regular sgpr S1(uniform) instruction is now broken since
137
+ // it uses sreg_64_xexec(S1) which is divergent.
138
+
139
+ // Replace virtual registers with register class on generic instructions
140
+ // uses with virtual registers with register bank.
141
+ for (auto &UseMI : make_early_inc_range (MRI.use_instructions (Reg))) {
142
+ if (UseMI.isPreISelOpcode ()) {
143
+ for (MachineOperand &Op : UseMI.operands ()) {
144
+ if (Op.isReg () && Op.getReg () == Reg)
145
+ Op.setReg (NewReg);
146
+ }
147
+ }
148
+ }
149
+ }
150
+
151
+ // %a = G_ ..., %rc
152
+ // ->
153
+ // %rb:RegBank(s32) = COPY %rc
154
+ // %a = G_ ..., %rb
155
+ void constrainRegBankUse (MachineInstr &MI, MachineOperand &UseOP,
156
+ const RegisterBank *RB) {
157
+ Register Reg = UseOP.getReg ();
158
+
159
+ LLT Ty = MRI.getType (Reg);
160
+ Register NewReg = MRI.createVirtualRegister ({RB, Ty});
161
+ UseOP.setReg (NewReg);
162
+
163
+ if (MI.isPHI ()) {
164
+ auto DefMI = MRI.getVRegDef (Reg)->getIterator ();
165
+ MachineBasicBlock *DefMBB = DefMI->getParent ();
166
+ B.setInsertPt (*DefMBB, DefMBB->SkipPHIsAndLabels (std::next (DefMI)));
167
+ } else {
168
+ B.setInstr (MI);
169
+ }
170
+
171
+ B.buildCopy (NewReg, Reg);
172
+ }
173
+ };
174
+
175
+ static Register getVReg (MachineOperand &Op) {
176
+ if (!Op.isReg ())
177
+ return {};
178
+
179
+ // Operands of COPY and G_SI_CALL can be physical registers.
180
+ Register Reg = Op.getReg ();
181
+ if (!Reg.isVirtual ())
182
+ return {};
183
+
184
+ return Reg;
185
+ }
186
+
69
187
bool AMDGPURegBankSelect::runOnMachineFunction (MachineFunction &MF) {
70
188
if (MF.getProperties ().hasProperty (
71
189
MachineFunctionProperties::Property::FailedISel))
72
190
return false ;
191
+
192
+ // Setup the instruction builder with CSE.
193
+ const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();
194
+ GISelCSEAnalysisWrapper &Wrapper =
195
+ getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper ();
196
+ GISelCSEInfo &CSEInfo = Wrapper.get (TPC.getCSEConfig ());
197
+ GISelObserverWrapper Observer;
198
+ Observer.addObserver (&CSEInfo);
199
+
200
+ CSEMIRBuilder B (MF);
201
+ B.setCSEInfo (&CSEInfo);
202
+ B.setChangeObserver (Observer);
203
+
204
+ RAIIDelegateInstaller DelegateInstaller (MF, &Observer);
205
+ RAIIMFObserverInstaller MFObserverInstaller (MF, Observer);
206
+
207
+ IntrinsicLaneMaskAnalyzer ILMA (MF);
208
+ MachineUniformityInfo &MUI =
209
+ getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo ();
210
+ MachineRegisterInfo &MRI = *B.getMRI ();
211
+ const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
212
+ RegBankSelectHelper RBSHelper (B, ILMA, MUI, *ST.getRegBankInfo ());
213
+ // Virtual registers at this point don't have register banks.
214
+ // Virtual registers in def and use operands of already inst-selected
215
+ // instruction have register class.
216
+
217
+ for (MachineBasicBlock &MBB : MF) {
218
+ for (MachineInstr &MI : MBB) {
219
+ // Vregs in def and use operands of COPY can have either register class
220
+ // or bank. If there is neither on vreg in def operand, assign bank.
221
+ if (MI.isCopy ()) {
222
+ Register DefReg = getVReg (MI.getOperand (0 ));
223
+ if (!DefReg.isValid () || MRI.getRegClassOrNull (DefReg))
224
+ continue ;
225
+
226
+ assert (!MRI.getRegBankOrNull (DefReg));
227
+ MRI.setRegBank (DefReg, *RBSHelper.getRegBankToAssign (DefReg));
228
+ continue ;
229
+ }
230
+
231
+ if (!MI.isPreISelOpcode ())
232
+ continue ;
233
+
234
+ // Vregs in def and use operands of G_ instructions need to have register
235
+ // banks assigned. Before this loop possible case are
236
+ // - (1) vreg without register class or bank in def or use operand
237
+ // - (2) vreg with register class in def operand
238
+ // - (3) vreg, defined by G_ instruction, in use operand
239
+ // - (4) vreg, defined by pre-inst-selected instruction, in use operand
240
+
241
+ // First three cases are handled in loop through all def operands of G_
242
+ // instructions. For case (1) simply setRegBank. Cases (2) and (3) are
243
+ // handled by reAssignRegBankOnDef.
244
+ for (MachineOperand &DefOP : MI.defs ()) {
245
+ Register DefReg = getVReg (DefOP);
246
+ if (!DefReg.isValid ())
247
+ continue ;
248
+
249
+ const RegisterBank *RB = RBSHelper.getRegBankToAssign (DefReg);
250
+ if (MRI.getRegClassOrNull (DefReg))
251
+ RBSHelper.reAssignRegBankOnDef (MI, DefOP, RB);
252
+ else {
253
+ assert (!MRI.getRegBankOrNull (DefReg));
254
+ MRI.setRegBank (DefReg, *RB);
255
+ }
256
+ }
257
+
258
+ // Register bank select doesn't modify pre-inst-selected instructions.
259
+ // For case (4) need to insert a copy, handled by constrainRegBankUse.
260
+ for (MachineOperand &UseOP : MI.uses ()) {
261
+ Register UseReg = getVReg (UseOP);
262
+ if (!UseReg.isValid ())
263
+ continue ;
264
+
265
+ // Skip case (3).
266
+ if (!MRI.getRegClassOrNull (UseReg) ||
267
+ MRI.getVRegDef (UseReg)->isPreISelOpcode ())
268
+ continue ;
269
+
270
+ // Use with register class defined by pre-inst-selected instruction.
271
+ const RegisterBank *RB = RBSHelper.getRegBankToAssign (UseReg);
272
+ RBSHelper.constrainRegBankUse (MI, UseOP, RB);
273
+ }
274
+ }
275
+ }
276
+
73
277
return true ;
74
278
}
0 commit comments