Skip to content

Commit b4419f9

Browse files
author
Balaram Makam
committed
[AArch64] Refine Falkor Machine Model - Part 3
This concludes the refinements to Falkor Machine Model. It includes SchedPredicates for immediate zero and LSL Fast. Forwarding logic is also modeled for vector multiply and accumulate only. llvm-svn: 299810
1 parent bfb2a9d commit b4419f9

File tree

5 files changed

+135
-26
lines changed

5 files changed

+135
-26
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -763,6 +763,17 @@ bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
763763
llvm_unreachable("Unknown opcode to check as cheap as a move!");
764764
}
765765

766+
bool AArch64InstrInfo::isFalkorLSLFast(const MachineInstr &MI) const {
767+
if (MI.getNumOperands() < 4)
768+
return false;
769+
unsigned ShOpVal = MI.getOperand(3).getImm();
770+
unsigned ShImm = AArch64_AM::getShiftValue(ShOpVal);
771+
if (AArch64_AM::getShiftType(ShOpVal) == AArch64_AM::LSL &&
772+
ShImm < 4)
773+
return true;
774+
return false;
775+
}
776+
766777
bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
767778
unsigned &SrcReg, unsigned &DstReg,
768779
unsigned &SubIdx) const {

llvm/lib/Target/AArch64/AArch64InstrInfo.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -268,7 +268,9 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
268268
MachineBasicBlock::iterator &It,
269269
MachineFunction &MF,
270270
bool IsTailCall) const override;
271-
271+
/// Returns true if the instruction has a shift by immediate that can be
272+
/// executed in one cycle less.
273+
bool isFalkorLSLFast(const MachineInstr &MI) const;
272274
private:
273275

274276
/// \brief Sets the offsets on outlined instructions in \p MBB which use SP

llvm/lib/Target/AArch64/AArch64SchedFalkor.td

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
// instruction cost model.
1818

1919
def FalkorModel : SchedMachineModel {
20-
let IssueWidth = 4; // 4-wide issue for expanded uops.
20+
let IssueWidth = 8; // 8 uops are dispatched per cycle.
2121
let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer.
2222
let LoopMicroOpBufferSize = 16;
2323
let LoadLatency = 3; // Optimistic load latency.
@@ -71,11 +71,11 @@ def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
7171
{ let Latency = 2; let NumMicroOps = 2; }
7272
def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
7373
def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
74-
{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1
74+
{ let Latency = 8; let NumMicroOps = 2; }
7575
def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
76-
{ let Latency = 8; let NumMicroOps = 1; } // Fragent -1
76+
{ let Latency = 16; let NumMicroOps = 2; }
7777
def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
78-
def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 4; }
78+
def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
7979
def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
8080
def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
8181
def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }

llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td

Lines changed: 66 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,12 +41,11 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "^FADDP(v2i16p|v2i32p|v2i64p|v2
4141
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
4242
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTXNv1i64)>;
4343
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i32|v4i16)(_shift)?$")>;
44-
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
4544

46-
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FML(A|S)|FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
45+
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
4746
def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FMULX16, FMULX32)>;
4847

49-
def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FML(A|S)|FMUL|FMULX)v1i64_indexed$")>;
48+
def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^(FMUL|FMULX)v1i64_indexed$")>;
5049
def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FMULX64)>;
5150

5251
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "^(FABS|FNEG)(v2f64|v4f32|v8f16)$")>;
@@ -62,18 +61,24 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^(FABD|FADD(P)?|FSUB)(v2f64|v4
6261
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVT(N|M|P|Z|A)(S|U)(v2f64|v4f32|v8f16)$")>;
6362
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(FCVTL|FCVTL2)(v2i32|v4i16|v4i32|v8i16)$")>;
6463
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^FCVTZ(S|U)(v2i64|v4i32|v8i16)(_shift)?$")>;
65-
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
6664

67-
def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FML(A|S)|FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
65+
def : InstRW<[FalkorWr_2VXVY_5cyc], (instregex "^(FMUL|FMULX)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
6866

69-
def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FML(A|S)|FMUL|FMULX)v2i64_indexed$")>;
67+
def : InstRW<[FalkorWr_2VXVY_6cyc], (instregex "^(FMUL|FMULX)v2i64_indexed$")>;
7068

7169
def : InstRW<[FalkorWr_3VXVY_4cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v1i32|v1i64|v1f16|v2f32|v4f16)$")>;
7270

7371
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^(FCVTX?N|FCVTX?N2)(v2i32|v4i16|v4i32|v8i16|v4f32)$")>;
7472

7573
def : InstRW<[FalkorWr_2VX_2VY_2cyc], (instregex "^(FDIV|FSQRT)(v2f64|v4f32|v8f16)$")>;
7674

75+
def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v8i8|v4i16|v2i32)(_indexed)?$")>;
76+
def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^ML(A|S)(v16i8|v8i16|v4i32|v2i64)(_indexed)?$")>;
77+
78+
def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f32|v4f16|(v1i16_indexed|v4i16_indexed|v1i32_indexed|v2i32_indexed))$")>;
79+
def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v1i64_indexed$")>;
80+
def : InstRW<[FalkorWr_2VXVY_5cyc, FalkorReadFMA],(instregex "^FML(A|S)(v2f64|v4f32|v8f16|v8i16_indexed|v4i32_indexed)$")>;
81+
def : InstRW<[FalkorWr_2VXVY_6cyc, FalkorReadFMA],(instregex "^FML(A|S)v2i64_indexed$")>;
7782
// SIMD Integer Instructions
7883
// -----------------------------------------------------------------------------
7984
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^ADD(v1i64|v2i32|v4i16|v8i8)$")>;
@@ -116,7 +121,7 @@ def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^(S|U)?(MAX|MIN)V(v8i8v|v8i16v
116121
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs ADDVv8i8v)>;
117122
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^MUL(v2i32|v4i16|v8i8)(_indexed)?$")>;
118123
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQR?DMULH(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
119-
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMU?L(A|S)?L()v.*$")>;
124+
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQDMULL(i16|i32)$")>;
120125
def : InstRW<[FalkorWr_1VXVY_4cyc], (instregex "^SQRDML(A|S)?H(v8i8|v4i16|v1i32|v2i32|v1i16)(_indexed)?$")>;
121126

122127
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^(S|U)?(MAX|MIN)Vv16i8v$")>;
@@ -165,7 +170,7 @@ def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^S(L|R)I(v16i8|v8i16|v4i32|v2i
165170
def : InstRW<[FalkorWr_2VXVY_3cyc], (instregex "^SQ(ABS|NEG)(v16i8|v8i16|v4i32|v2i64)$")>;
166171

167172
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(MUL|SQR?DMULH)(v16i8|v8i16|v4i32)(_indexed)?$")>;
168-
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQD(MLAL|MLSL|MULL)v.*$")>;
173+
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQDMULLv.*$")>;
169174
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^SQRDML(A|S)H(v16i8|v8i16|v4i32)(_indexed)?$")>;
170175

171176
def : InstRW<[FalkorWr_3VXVY_3cyc], (instregex "^(S|U)ADDLVv4i32v$")>;
@@ -180,6 +185,8 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instregex "^(S|U)ABALv.*$")>;
180185

181186
def : InstRW<[FalkorWr_4VXVY_4cyc], (instregex "^(S|U)ABA(v16i8|v8i16|v4i32)$")>;
182187

188+
def : InstRW<[FalkorWr_1VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)(i16|i32)$")>;
189+
def : InstRW<[FalkorWr_2VXVY_4cyc, FalkorReadVMA],(instregex "^SQD(MLAL|MLSL)v.*$")>;
183190
// SIMD Load Instructions
184191
// -----------------------------------------------------------------------------
185192
def : InstRW<[WriteVLD], (instregex "^LD1(i64|Onev(8b|4h|2s|1d|16b|8h|4s|2d))$")>;
@@ -257,19 +264,57 @@ def : InstRW<[FalkorWr_2LD_2VXVY_2LD_1XYZ_2VXVY_4cyc, WriteAdr],(instregex "^LD4
257264

258265
// Arithmetic and Logical Instructions
259266
// -----------------------------------------------------------------------------
260-
def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
267+
def : InstRW<[FalkorWr_ADD], (instregex "^ADD(S)?(W|X)r(s|x)$")>;
261268
def : InstRW<[FalkorWr_2XYZ_2cyc], (instregex "^SUB(S)?(W|X)r(s|x)$")>;
262269

263270
// SIMD Miscellaneous Instructions
264271
// -----------------------------------------------------------------------------
272+
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^DUP(v8i8|v4i16|v2i32)(gpr|lane)$")>;
273+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^DUP(v16i8|v8i16)(gpr|lane)$")>;
274+
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^INSv(i8|i16)(gpr|lane)$")>;
275+
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^(S|U)MOVv.*$")>;
276+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(BIF|BIT|BSL)v8i8$")>;
277+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs EXTv8i8)>;
278+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "(MOVI|MVNI)(D|v8b_ns|v2i32|v4i16|v2s_msl)$")>;
265279
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs TBLv8i8One)>;
280+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instrs NOTv8i8)>;
281+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^REV(16|32|64)v.*$")>;
282+
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^(TRN1|TRN2|ZIP1|UZP1|UZP2|ZIP2|XTN|XTN2)(v2i32|v2i64|v4i16|v4i32|v8i8|v8i16|v16i8)$")>;
283+
284+
def : InstRW<[FalkorWr_1VXVY_2cyc], (instregex "^(CLS|CLZ|CNT|RBIT)(v4i32|v8i16|v16i8)$")>;
285+
286+
def : InstRW<[FalkorWr_1VXVY_3cyc], (instregex "(S|U)QXTU?Nv.*$")>;
287+
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPEv1i32, FRECPEv1i64, FRSQRTEv1i32, FRSQRTEv1i64, FRECPEv2f32, FRSQRTEv2f32)>;
288+
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FRECPXv1i32, FRECPXv1i64)>;
289+
def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs URECPEv2i32, URSQRTEv2i32)>;
290+
291+
def : InstRW<[FalkorWr_1VXVY_5cyc], (instrs FRECPS32, FRSQRTS32, FRECPSv2f32, FRSQRTSv2f32)>;
292+
293+
def : InstRW<[FalkorWr_1VXVY_6cyc], (instrs FRECPS64, FRSQRTS64)>;
294+
295+
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^DUP(v4i32|v2i64)(gpr|lane)$")>;
296+
def : InstRW<[FalkorWr_2GTOV_1cyc], (instregex "^INSv(i32|i64)(gpr|lane)$")>;
297+
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs EXTv16i8)>;
298+
def : InstRW<[FalkorWr_2VXVY_1cyc], (instregex "(MOVI|MVNI)(v2d_ns|v16b_ns|v4i32|v8i16|v4s_msl)$")>;
299+
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs NOTv16i8)>;
266300
def : InstRW<[FalkorWr_2VXVY_1cyc], (instrs TBLv16i8One)>;
301+
302+
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs FRECPEv2f64, FRECPEv4f32, FRSQRTEv2f64, FRSQRTEv4f32)>;
303+
def : InstRW<[FalkorWr_2VXVY_3cyc], (instrs URECPEv4i32, URSQRTEv4i32)>;
304+
267305
def : InstRW<[FalkorWr_2VXVY_4cyc], (instrs TBLv8i8Two)>;
268306
def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^TBX(v8|v16)i8One$")>;
307+
308+
def : InstRW<[FalkorWr_2VXVY_5cyc], (instrs FRECPSv4f32, FRSQRTSv4f32)>;
309+
310+
def : InstRW<[FalkorWr_2VXVY_6cyc], (instrs FRECPSv2f64, FRSQRTSv2f64)>;
311+
269312
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBL(v8i8Three|v16i8Two)$")>;
270313
def : InstRW<[FalkorWr_3VXVY_5cyc], (instregex "^TBX(v8i8Two|v16i8Two)$")>;
314+
271315
def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBL(v8i8Four|v16i8Three)$")>;
272316
def : InstRW<[FalkorWr_4VXVY_6cyc], (instregex "^TBX(v8i8Three|v16i8Three)$")>;
317+
273318
def : InstRW<[FalkorWr_5VXVY_7cyc], (instrs TBLv16i8Four)>;
274319
def : InstRW<[FalkorWr_5VXVY_7cyc], (instregex "^TBX(v8i8Four|v16i8Four)$")>;
275320

@@ -334,7 +379,7 @@ def : InstRW<[FalkorWr_4VXVY_3cyc], (instrs SHA256SU1rrr)>;
334379
def : InstRW<[WriteLD], (instregex "^LDR((Q|D|S|H|B)ui|(Q|D|S)l)$")>;
335380
def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(Q|D|S|H|B)(post|pre)$")>;
336381
def : InstRW<[WriteLD], (instregex "^LDUR(Q|D|S|H|B)i$")>;
337-
def : InstRW<[FalkorWr_1XYZ_1LD_4cyc],(instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
382+
def : InstRW<[FalkorWr_LDR], (instregex "^LDR(Q|D|H|S|B)ro(W|X)$")>;
338383
def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDNPQi)>;
339384
def : InstRW<[FalkorWr_2LD_3cyc, WriteLDHi],(instrs LDPQi)>;
340385
def : InstRW<[FalkorWr_1LD_1none_3cyc, WriteLDHi],(instregex "LDNP(D|S)i$")>;
@@ -361,18 +406,18 @@ def : InstRW<[FalkorWr_1VXVY_3cyc], (instrs FCVTSHr, FCVTDHr)>;
361406

362407
def : InstRW<[FalkorWr_1VXVY_4cyc], (instrs FCVTSDr, FCVTDSr)>;
363408

364-
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
365409
def : InstRW<[FalkorWr_1VXVY_5cyc], (instregex "^F(N)?MUL(H|S)rr$")>;
366410

367-
def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?M(ADD|SUB)Drrr$")>;
368411
def : InstRW<[FalkorWr_1VXVY_6cyc], (instregex "^F(N)?MULDrr$")>;
369412

370413
def : InstRW<[FalkorWr_1VX_1VY_10cyc],(instregex "^FDIV(H|S|D)rr$")>;
371414
def : InstRW<[FalkorWr_1VX_1VY_2cyc], (instregex "^FSQRT(H|S|D)r$")>;
372415

416+
def : InstRW<[FalkorWr_1VXVY_5cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)(H|S)rrr$")>;
417+
def : InstRW<[FalkorWr_1VXVY_6cyc, FalkorReadFMA],(instregex "^F(N)?M(ADD|SUB)Drrr$")>;
373418
// FP Miscellaneous Instructions
374419
// -----------------------------------------------------------------------------
375-
def : InstRW<[FalkorWr_1GTOV_1cyc], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
420+
def : InstRW<[FalkorWr_FMOV], (instregex "^FMOV(HW|HX|SW|DX|DXHigh)r$")>;
376421
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FCVTZ(S|U)(S|U)(W|X)(D|S)ri?$")>;
377422
def : InstRW<[FalkorWr_1VTOG_1cyc], (instregex "^FMOV(WH|WS|XH|XD|XDHigh)r$")>;
378423
def : InstRW<[FalkorWr_1VXVY_1cyc], (instregex "^FMOV(Hi|Hr|S0|Si|Sr|D0|Di|Dr|v.*_ns)$")>;
@@ -388,8 +433,8 @@ def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)CVTF(v2i64|v4i32|v8i16|v
388433
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFMui, PRFMl)>;
389434
def : InstRW<[FalkorWr_1ST_0cyc], (instrs PRFUMi)>;
390435

391-
def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
392-
def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
436+
def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDNP(W|X)i$")>;
437+
def : InstRW<[WriteLD, WriteLDHi], (instregex "^LDP(W|X)i$")>;
393438
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(B|H|W|X)ui$")>;
394439
def : InstRW<[WriteLD, WriteAdr], (instregex "^LDR(B|H|W|X)(post|pre)$")>;
395440
def : InstRW<[FalkorWr_1LD_3cyc], (instregex "^LDR(W|X)l$")>;
@@ -401,10 +446,10 @@ def : InstRW<[FalkorWr_1LD_4cyc], (instrs LDRSWl)>;
401446
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDTRS(BW|BX|HW|HX|W)i$")>;
402447
def : InstRW<[FalkorWr_1LD_4cyc], (instregex "^LDURS(BW|BX|HW|HX|W)i$")>;
403448

404-
def : InstRW<[FalkorWr_1XYZ_1ST_4cyc],(instregex "^PRFMro(W|X)$")>;
405-
def : InstRW<[FalkorWr_1XYZ_1LD_4cyc],(instregex "^LDR(B|H|W|X)ro(W|X)$")>;
449+
def : InstRW<[FalkorWr_PRFM], (instregex "^PRFMro(W|X)$")>;
450+
def : InstRW<[FalkorWr_LDR], (instregex "^LDR(B|H|W|X)ro(W|X)$")>;
406451

407-
def : InstRW<[FalkorWr_1XYZ_1LD_5cyc],(instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
452+
def : InstRW<[FalkorWr_LDRS], (instregex "^LDRS(BW|BX|HW|HX|W)ro(W|X)$")>;
408453

409454
def : InstRW<[FalkorWr_1LD_4cyc, WriteAdr],(instregex "^LDRS(BW|BX|HW|HX|W)(post|pre)$")>;
410455
def : InstRW<[WriteLD, WriteLDHi, WriteAdr],(instregex "^LDP(W|X)(post|pre)$")>;
@@ -435,7 +480,7 @@ def : InstRW<[FalkorWr_2VXVY_4cyc], (instregex "^(S|U)(MLAL|MLSL|MULL)v.*$")>;
435480
def : InstRW<[FalkorWr_1XYZ_1cyc], (instregex "^(LSLV|LSRV|ASRV|RORV|MOVK)(W|X).*")>;
436481
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^ADRP?$")>;
437482
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVN(W|X)i$")>;
438-
def : InstRW<[FalkorWr_1XYZB_1cyc], (instregex "^MOVZ(W|X)i$")>;
483+
def : InstRW<[FalkorWr_MOVZ], (instregex "^MOVZ(W|X)i$")>;
439484

440485
// Other Instructions
441486
// -----------------------------------------------------------------------------
@@ -467,6 +512,8 @@ def : InstRW<[WriteVST, WriteVST], (instrs STNPQi)>;
467512
// -----------------------------------------------------------------------------
468513
def : InstRW<[WriteVST], (instregex "^STP(D|S)(i|post|pre)$")>;
469514
def : InstRW<[WriteST], (instregex "^STP(W|X)(i|post|pre)$")>;
515+
def : InstRW<[WriteST], (instregex "^STR(Q|D|S|BB|HH)ui$")>;
516+
def : InstRW<[WriteST], (instregex "^STUR(Q|D|S|BB|HH)i$")>;
470517
def : InstRW<[WriteST], (instregex "^STR(B|H|W|X)(post|pre|ui)$")>;
471518
def : InstRW<[WriteST], (instregex "^STTR(B|H|W|X)i$")>;
472519
def : InstRW<[WriteST], (instregex "^STUR(B|H|W|X)i$")>;

llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,10 @@
1919
// down one Z pipe, six SD pipes, four VX pipes and the total latency is
2020
// six cycles.
2121
//
22+
// Contains all of the Falkor specific ReadAdvance types for forwarding logic.
23+
//
24+
// Contains all of the Falkor specific WriteVariant types for immediate zero
25+
// and LSLFast.
2226
//===----------------------------------------------------------------------===//
2327

2428
//===----------------------------------------------------------------------===//
@@ -47,6 +51,7 @@ def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
4751

4852
def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
4953
def FalkorWr_1ST_0cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 0; }
54+
def FalkorWr_1ST_3cyc : SchedWriteRes<[FalkorUnitST]> { let Latency = 3; }
5055

5156
def FalkorWr_1GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 1; }
5257
def FalkorWr_1GTOV_4cyc : SchedWriteRes<[FalkorUnitGTOV]>{ let Latency = 4; }
@@ -113,6 +118,11 @@ def FalkorWr_1VX_1VY_10cyc : SchedWriteRes<[FalkorUnitVX, FalkorUnitVY]> {
113118
let NumMicroOps = 2;
114119
}
115120

121+
def FalkorWr_2GTOV_1cyc : SchedWriteRes<[FalkorUnitGTOV, FalkorUnitGTOV]> {
122+
let Latency = 1;
123+
let NumMicroOps = 2;
124+
}
125+
116126
def FalkorWr_1XYZ_1ST_4cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST]> {
117127
let Latency = 4;
118128
let NumMicroOps = 2;
@@ -134,12 +144,12 @@ def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
134144

135145
def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
136146
let Latency = 8;
137-
let NumMicroOps = 2;
147+
let ResourceCycles = [2, 8];
138148
}
139149

140150
def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
141151
let Latency = 16;
142-
let NumMicroOps = 2;
152+
let ResourceCycles = [2, 16];
143153
}
144154

145155
def FalkorWr_1LD_1Z_3cyc : SchedWriteRes<[FalkorUnitLD, FalkorUnitZ]> {
@@ -305,3 +315,42 @@ def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
305315
let Latency = 4;
306316
let NumMicroOps = 9;
307317
}
318+
319+
// Forwarding logic is modeled for vector multiply and accumulate
320+
// -----------------------------------------------------------------------------
321+
def FalkorReadVMA : SchedReadAdvance<2, [FalkorWr_1VXVY_4cyc,
322+
FalkorWr_2VXVY_4cyc]>;
323+
def FalkorReadFMA : SchedReadAdvance<3, [FalkorWr_1VXVY_5cyc,
324+
FalkorWr_1VXVY_6cyc,
325+
FalkorWr_2VXVY_5cyc,
326+
FalkorWr_2VXVY_6cyc]>;
327+
328+
// SchedPredicates and WriteVariants for Immediate Zero and LSLFast
329+
// -----------------------------------------------------------------------------
330+
def FalkorImmZPred : SchedPredicate<[{TII->isGPRZero(*MI)}]>;
331+
def FalkorLSLFastPred : SchedPredicate<[{TII->isFalkorLSLFast(*MI)}]>;
332+
333+
def FalkorWr_FMOV : SchedWriteVariant<[
334+
SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
335+
SchedVar<NoSchedPred, [FalkorWr_1GTOV_1cyc]>]>;
336+
337+
def FalkorWr_MOVZ : SchedWriteVariant<[
338+
SchedVar<FalkorImmZPred, [FalkorWr_1none_0cyc]>,
339+
SchedVar<NoSchedPred, [FalkorWr_1XYZB_1cyc]>]>;
340+
341+
def FalkorWr_LDR : SchedWriteVariant<[
342+
SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_3cyc]>,
343+
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_4cyc]>]>;
344+
345+
def FalkorWr_ADD : SchedWriteVariant<[
346+
SchedVar<FalkorLSLFastPred, [FalkorWr_1XYZ_1cyc]>,
347+
SchedVar<FalkorImmZPred, [FalkorWr_1XYZ_1cyc]>,
348+
SchedVar<NoSchedPred, [FalkorWr_2XYZ_2cyc]>]>;
349+
350+
def FalkorWr_PRFM : SchedWriteVariant<[
351+
SchedVar<FalkorLSLFastPred, [FalkorWr_1ST_3cyc]>,
352+
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1ST_4cyc]>]>;
353+
354+
def FalkorWr_LDRS : SchedWriteVariant<[
355+
SchedVar<FalkorLSLFastPred, [FalkorWr_1LD_4cyc]>,
356+
SchedVar<NoSchedPred, [FalkorWr_1XYZ_1LD_5cyc]>]>;

0 commit comments

Comments
 (0)