@@ -50,14 +50,23 @@ class SILowerSGPRSpills : public MachineFunctionPass {
50
50
SILowerSGPRSpills () : MachineFunctionPass(ID) {}
51
51
52
52
void calculateSaveRestoreBlocks (MachineFunction &MF);
53
- bool spillCalleeSavedRegs (MachineFunction &MF);
53
+ bool spillCalleeSavedRegs (MachineFunction &MF,
54
+ SmallVectorImpl<int > &CalleeSavedFIs);
55
+ void extendWWMVirtRegLiveness (MachineFunction &MF, LiveIntervals *LIS);
54
56
55
57
bool runOnMachineFunction (MachineFunction &MF) override ;
56
58
57
59
void getAnalysisUsage (AnalysisUsage &AU) const override {
58
60
AU.setPreservesAll ();
59
61
MachineFunctionPass::getAnalysisUsage (AU);
60
62
}
63
+
64
+ MachineFunctionProperties getClearedProperties () const override {
65
+ // SILowerSGPRSpills introduces new Virtual VGPRs for spilling SGPRs.
66
+ return MachineFunctionProperties ()
67
+ .set (MachineFunctionProperties::Property::IsSSA)
68
+ .set (MachineFunctionProperties::Property::NoVRegs);
69
+ }
61
70
};
62
71
63
72
} // end anonymous namespace
@@ -197,7 +206,8 @@ static void updateLiveness(MachineFunction &MF, ArrayRef<CalleeSavedInfo> CSI) {
197
206
EntryBB.sortUniqueLiveIns ();
198
207
}
199
208
200
- bool SILowerSGPRSpills::spillCalleeSavedRegs (MachineFunction &MF) {
209
+ bool SILowerSGPRSpills::spillCalleeSavedRegs (
210
+ MachineFunction &MF, SmallVectorImpl<int > &CalleeSavedFIs) {
201
211
MachineRegisterInfo &MRI = MF.getRegInfo ();
202
212
const Function &F = MF.getFunction ();
203
213
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
@@ -228,6 +238,7 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
228
238
TRI->getSpillAlign (*RC), true );
229
239
230
240
CSI.push_back (CalleeSavedInfo (Reg, JunkFI));
241
+ CalleeSavedFIs.push_back (JunkFI);
231
242
}
232
243
}
233
244
@@ -248,6 +259,50 @@ bool SILowerSGPRSpills::spillCalleeSavedRegs(MachineFunction &MF) {
248
259
return false ;
249
260
}
250
261
262
+ void SILowerSGPRSpills::extendWWMVirtRegLiveness (MachineFunction &MF,
263
+ LiveIntervals *LIS) {
264
+ // TODO: This is a workaround to avoid the unmodelled liveness computed with
265
+ // whole-wave virtual registers when allocated together with the regular VGPR
266
+ // virtual registers. Presently, the liveness computed during the regalloc is
267
+ // only uniform (or single lane aware) and it doesn't take account of the
268
+ // divergent control flow that exists for our GPUs. Since the WWM registers
269
+ // can modify inactive lanes, the wave-aware liveness should be computed for
270
+ // the virtual registers to accurately plot their interferences. Without
271
+ // having the divergent CFG for the function, it is difficult to implement the
272
+ // wave-aware liveness info. Until then, we conservatively extend the liveness
273
+ // of the wwm registers into the entire function so that they won't be reused
274
+ // without first spilling/splitting their liveranges.
275
+ SIMachineFunctionInfo *MFI = MF.getInfo <SIMachineFunctionInfo>();
276
+
277
+ // Insert the IMPLICIT_DEF for the wwm-registers in the entry blocks.
278
+ for (auto Reg : MFI->getSGPRSpillVGPRs ()) {
279
+ for (MachineBasicBlock *SaveBlock : SaveBlocks) {
280
+ MachineBasicBlock::iterator InsertBefore = SaveBlock->begin ();
281
+ auto MIB = BuildMI (*SaveBlock, *InsertBefore, InsertBefore->getDebugLoc (),
282
+ TII->get (AMDGPU::IMPLICIT_DEF), Reg);
283
+ MFI->setFlag (Reg, AMDGPU::VirtRegFlag::WWM_REG);
284
+ if (LIS) {
285
+ LIS->InsertMachineInstrInMaps (*MIB);
286
+ }
287
+ }
288
+ }
289
+
290
+ // Insert the KILL in the return blocks to extend their liveness untill the
291
+ // end of function. Insert a separate KILL for each VGPR.
292
+ for (MachineBasicBlock *RestoreBlock : RestoreBlocks) {
293
+ MachineBasicBlock::iterator InsertBefore =
294
+ RestoreBlock->getFirstTerminator ();
295
+ for (auto Reg : MFI->getSGPRSpillVGPRs ()) {
296
+ auto MIB =
297
+ BuildMI (*RestoreBlock, *InsertBefore, InsertBefore->getDebugLoc (),
298
+ TII->get (TargetOpcode::KILL));
299
+ MIB.addReg (Reg);
300
+ if (LIS)
301
+ LIS->InsertMachineInstrInMaps (*MIB);
302
+ }
303
+ }
304
+ }
305
+
251
306
bool SILowerSGPRSpills::runOnMachineFunction (MachineFunction &MF) {
252
307
const GCNSubtarget &ST = MF.getSubtarget <GCNSubtarget>();
253
308
TII = ST.getInstrInfo ();
@@ -261,7 +316,8 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
261
316
// First, expose any CSR SGPR spills. This is mostly the same as what PEI
262
317
// does, but somewhat simpler.
263
318
calculateSaveRestoreBlocks (MF);
264
- bool HasCSRs = spillCalleeSavedRegs (MF);
319
+ SmallVector<int > CalleeSavedFIs;
320
+ bool HasCSRs = spillCalleeSavedRegs (MF, CalleeSavedFIs);
265
321
266
322
MachineFrameInfo &MFI = MF.getFrameInfo ();
267
323
MachineRegisterInfo &MRI = MF.getRegInfo ();
@@ -275,6 +331,7 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
275
331
276
332
bool MadeChange = false ;
277
333
bool NewReservedRegs = false ;
334
+ bool SpilledToVirtVGPRLanes = false ;
278
335
279
336
// TODO: CSR VGPRs will never be spilled to AGPRs. These can probably be
280
337
// handled as SpilledToReg in regular PrologEpilogInserter.
@@ -297,23 +354,53 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
297
354
298
355
int FI = TII->getNamedOperand (MI, AMDGPU::OpName::addr)->getIndex ();
299
356
assert (MFI.getStackID (FI) == TargetStackID::SGPRSpill);
300
- if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
301
- NewReservedRegs = true ;
302
- bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
303
- MI, FI, nullptr , Indexes, LIS);
304
- (void )Spilled;
305
- assert (Spilled && " failed to spill SGPR to VGPR when allocated" );
306
- SpillFIs.set (FI);
357
+
358
+ bool IsCalleeSaveSGPRSpill =
359
+ std::find (CalleeSavedFIs.begin (), CalleeSavedFIs.end (), FI) !=
360
+ CalleeSavedFIs.end ();
361
+ if (IsCalleeSaveSGPRSpill) {
362
+ // Spill callee-saved SGPRs into physical VGPR lanes.
363
+
364
+ // TODO: This is to ensure the CFIs are static for efficient frame
365
+ // unwinding in the debugger. Spilling them into virtual VGPR lanes
366
+ // involve regalloc to allocate the physical VGPRs and that might
367
+ // cause intermediate spill/split of such liveranges for successful
368
+ // allocation. This would result in broken CFI encoding unless the
369
+ // regalloc aware CFI generation to insert new CFIs along with the
370
+ // intermediate spills is implemented. There is no such support
371
+ // currently exist in the LLVM compiler.
372
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI, true )) {
373
+ NewReservedRegs = true ;
374
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
375
+ MI, FI, nullptr , Indexes, LIS, true );
376
+ if (!Spilled)
377
+ llvm_unreachable (
378
+ " failed to spill SGPR to physical VGPR lane when allocated" );
379
+ }
380
+ } else {
381
+ if (FuncInfo->allocateSGPRSpillToVGPRLane (MF, FI)) {
382
+ bool Spilled = TRI->eliminateSGPRToVGPRSpillFrameIndex (
383
+ MI, FI, nullptr , Indexes, LIS);
384
+ if (!Spilled)
385
+ llvm_unreachable (
386
+ " failed to spill SGPR to virtual VGPR lane when allocated" );
387
+ SpillFIs.set (FI);
388
+ SpilledToVirtVGPRLanes = true ;
389
+ }
307
390
}
308
391
}
309
392
}
310
393
311
- // FIXME: Adding to live-ins redundant with reserving registers.
312
- for (MachineBasicBlock &MBB : MF) {
313
- for (auto Reg : FuncInfo->getSGPRSpillVGPRs ())
314
- MBB.addLiveIn (Reg);
315
- MBB.sortUniqueLiveIns ();
394
+ if (SpilledToVirtVGPRLanes) {
395
+ extendWWMVirtRegLiveness (MF, LIS);
396
+ if (LIS) {
397
+ // Compute the LiveInterval for the newly created virtual registers.
398
+ for (auto Reg : FuncInfo->getSGPRSpillVGPRs ())
399
+ LIS->createAndComputeVirtRegInterval (Reg);
400
+ }
401
+ }
316
402
403
+ for (MachineBasicBlock &MBB : MF) {
317
404
// FIXME: The dead frame indices are replaced with a null register from
318
405
// the debug value instructions. We should instead, update it with the
319
406
// correct register value. But not sure the register value alone is
@@ -334,6 +421,10 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
334
421
// lane".
335
422
FuncInfo->removeDeadFrameIndices (MFI, /* ResetSGPRSpillStackIDs*/ false );
336
423
424
+ MadeChange = true ;
425
+ }
426
+
427
+ if (SpilledToVirtVGPRLanes) {
337
428
const TargetRegisterClass *RC = TRI->getWaveMaskRegClass ();
338
429
// Shift back the reserved SGPR for EXEC copy into the lowest range.
339
430
// This SGPR is reserved to handle the whole-wave spill/copy operations
@@ -342,20 +433,21 @@ bool SILowerSGPRSpills::runOnMachineFunction(MachineFunction &MF) {
342
433
if (UnusedLowSGPR && TRI->getHWRegIndex (UnusedLowSGPR) <
343
434
TRI->getHWRegIndex (FuncInfo->getSGPRForEXECCopy ()))
344
435
FuncInfo->setSGPRForEXECCopy (UnusedLowSGPR);
345
-
346
- MadeChange = true ;
347
436
} else {
348
- // No SGPR spills and hence there won't be any WWM spills/copies. Reset the
349
- // SGPR reserved for EXEC copy.
437
+ // No SGPR spills to virtual VGPR lanes and hence there won't be any WWM
438
+ // spills/copies. Reset the SGPR reserved for EXEC copy.
350
439
FuncInfo->setSGPRForEXECCopy (AMDGPU::NoRegister);
351
440
}
352
441
353
442
SaveBlocks.clear ();
354
443
RestoreBlocks.clear ();
355
444
356
- // Updated the reserved registers with any VGPRs added for SGPR spills.
357
- if (NewReservedRegs)
358
- MRI.freezeReservedRegs (MF);
445
+ // Updated the reserved registers with any physical VGPRs added for SGPR
446
+ // spills.
447
+ if (NewReservedRegs) {
448
+ for (Register Reg : FuncInfo->getWWMReservedRegs ())
449
+ MRI.reserveReg (Reg, TRI);
450
+ }
359
451
360
452
return MadeChange;
361
453
}
0 commit comments