Skip to content

Commit 10c10f2

Browse files
committed
[AMDGPU] Fix assertion failure in SIInsertHardClauses
This new pass failed an assertion whenever there were s_nops after the end of clause. Differential Revision: https://reviews.llvm.org/D80007
1 parent 0ee04e6 commit 10c10f2

File tree

2 files changed

+42
-7
lines changed

2 files changed

+42
-7
lines changed

llvm/lib/Target/AMDGPU/SIInsertHardClauses.cpp

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ HardClauseType getHardClauseType(const MachineInstr &MI) {
7676

7777
// Don't form VALU clauses. It's not clear what benefit they give, if any.
7878

79-
// In practice s_nop is the only internal instructions we're likely to see.
79+
// In practice s_nop is the only internal instruction we're likely to see.
8080
// It's safe to treat the rest as illegal.
8181
if (MI.getOpcode() == AMDGPU::S_NOP)
8282
return HARDCLAUSE_INTERNAL;
@@ -103,23 +103,25 @@ class SIInsertHardClauses : public MachineFunctionPass {
103103
// The last non-internal instruction in the clause.
104104
MachineInstr *Last = nullptr;
105105
// The length of the clause including any internal instructions in the
106-
// middle.
106+
// middle or after the end of the clause.
107107
unsigned Length = 0;
108108
// The base operands of *Last.
109109
SmallVector<const MachineOperand *, 4> BaseOps;
110110
};
111111

112112
bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
113-
assert(CI.Length ==
114-
std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1);
115-
if (CI.Length < 2)
113+
// Get the size of the clause excluding any internal instructions at the
114+
// end.
115+
unsigned Size =
116+
std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1;
117+
if (Size < 2)
116118
return false;
117-
assert(CI.Length <= 64 && "Hard clause is too long!");
119+
assert(Size <= 64 && "Hard clause is too long!");
118120

119121
auto &MBB = *CI.First->getParent();
120122
auto ClauseMI =
121123
BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
122-
.addImm(CI.Length - 1);
124+
.addImm(Size - 1);
123125
finalizeBundle(MBB, ClauseMI->getIterator(),
124126
std::next(CI.Last->getIterator()));
125127
return true;

llvm/test/CodeGen/AMDGPU/hard-clauses.mir

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,39 @@
11
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
22
# RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -run-pass si-insert-hard-clauses %s -o - | FileCheck %s
33

4+
---
5+
name: nop1
6+
tracksRegLiveness: true
7+
body: |
8+
bb.0:
9+
liveins: $sgpr0_sgpr1
10+
; CHECK-LABEL: name: nop1
11+
; CHECK: liveins: $sgpr0_sgpr1
12+
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
13+
; CHECK: S_NOP 2
14+
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
15+
S_NOP 2
16+
...
17+
18+
---
19+
name: nop2
20+
tracksRegLiveness: true
21+
body: |
22+
bb.0:
23+
liveins: $sgpr0_sgpr1
24+
; CHECK-LABEL: name: nop2
25+
; CHECK: liveins: $sgpr0_sgpr1
26+
; CHECK: BUNDLE implicit-def $sgpr2, implicit-def $sgpr2_lo16, implicit-def $sgpr2_hi16, implicit-def $sgpr3, implicit-def $sgpr3_lo16, implicit-def $sgpr3_hi16, implicit $sgpr0_sgpr1 {
27+
; CHECK: S_CLAUSE 2
28+
; CHECK: $sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
29+
; CHECK: S_NOP 2
30+
; CHECK: $sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0
31+
; CHECK: }
32+
$sgpr2 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
33+
S_NOP 2
34+
$sgpr3 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 4, 0, 0
35+
...
36+
437
---
538
name: long_clause
639
tracksRegLiveness: true

0 commit comments

Comments
 (0)