Skip to content

Commit 4400018

Browse files
committed
[RISCV] Model all 3 arithmetic sources of vector FMA at MC layer.
For the most part, MC version of vector instructions don't model when the destination is also a source. This primarily occurs for mask/tail undisturbed. The MC layer can't see the policy bits so this kind of makes sense. We also lumped FMA instructions into this, but the destination of FMA is an arithmetic source not just an undisturbed value. This needs to be correct for llvm-mca to understand the dependency for the FMA instructions. Though every other instruction is still wrong for tail/mask undisturbed. This patch models the FMA instructions correctly at the MCA layer. This necessitates changes to the assembler to offset operand numbers. I've added the extra sched class operand and fixed the operand order for the scalar read class. Reviewed By: rogfer01 Differential Revision: https://reviews.llvm.org/D151850
1 parent df6b35e commit 4400018

File tree

5 files changed

+96
-58
lines changed

5 files changed

+96
-58
lines changed

llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3352,16 +3352,21 @@ bool RISCVAsmParser::validateInstruction(MCInst &Inst,
33523352
}
33533353

33543354
unsigned DestReg = Inst.getOperand(0).getReg();
3355+
unsigned Offset = 0;
3356+
int TiedOp = MCID.getOperandConstraint(1, MCOI::TIED_TO);
3357+
if (TiedOp == 0)
3358+
Offset = 1;
3359+
33553360
// Operands[1] will be the first operand, DestReg.
33563361
SMLoc Loc = Operands[1]->getStartLoc();
33573362
if (MCID.TSFlags & RISCVII::VS2Constraint) {
3358-
unsigned CheckReg = Inst.getOperand(1).getReg();
3363+
unsigned CheckReg = Inst.getOperand(Offset + 1).getReg();
33593364
if (DestReg == CheckReg)
33603365
return Error(Loc, "The destination vector register group cannot overlap"
33613366
" the source vector register group.");
33623367
}
3363-
if ((MCID.TSFlags & RISCVII::VS1Constraint) && (Inst.getOperand(2).isReg())) {
3364-
unsigned CheckReg = Inst.getOperand(2).getReg();
3368+
if ((MCID.TSFlags & RISCVII::VS1Constraint) && Inst.getOperand(Offset + 2).isReg()) {
3369+
unsigned CheckReg = Inst.getOperand(Offset + 2).getReg();
33653370
if (DestReg == CheckReg)
33663371
return Error(Loc, "The destination vector register group cannot overlap"
33673372
" the source vector register group.");

llvm/lib/Target/RISCV/RISCVInstrInfoV.td

Lines changed: 51 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,9 @@ class SchedBinaryMC<string write, string read0, string read1,
162162
class SchedTernary<string write, string read0, string read1, string read2,
163163
string mx, int sew = 0, bit forceMasked = 0>:
164164
SchedNary<write, [read0, read1, read2], mx, sew, forceMasked>;
165+
class SchedTernaryMC<string write, string read0, string read1, string read2,
166+
int sew = 0, bit forceMasked = 1>:
167+
SchedNary<write, [read0, read1, read2], "WorstCase", sew, forceMasked>;
165168

166169
// For reduction instructions.
167170
class SchedReduction<string write, string read, string mx, int sew>:
@@ -438,10 +441,14 @@ class VALUmVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
438441
}
439442

440443
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
441-
class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
442-
: RVInstVV<funct6, opv, (outs VR:$vd),
443-
(ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
444-
opcodestr, "$vd, $vs1, $vs2$vm">;
444+
class VALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
445+
bit EarlyClobber = 0>
446+
: RVInstVV<funct6, opv, (outs VR:$vd_wb),
447+
(ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
448+
opcodestr, "$vd, $vs1, $vs2$vm"> {
449+
let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
450+
"$vd = $vd_wb");
451+
}
445452

446453
// op vd, vs2, vs1
447454
class VALUVVNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -466,10 +473,14 @@ class VALUmVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
466473
}
467474

468475
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
469-
class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
470-
: RVInstVX<funct6, opv, (outs VR:$vd),
471-
(ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
472-
opcodestr, "$vd, $rs1, $vs2$vm">;
476+
class VALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
477+
bit EarlyClobber = 0>
478+
: RVInstVX<funct6, opv, (outs VR:$vd_wb),
479+
(ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
480+
opcodestr, "$vd, $rs1, $vs2$vm"> {
481+
let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
482+
"$vd = $vd_wb");
483+
}
473484

474485
// op vd, vs1, vs2
475486
class VALUVXNoVm<bits<6> funct6, RISCVVFormat opv, string opcodestr>
@@ -508,10 +519,14 @@ class VALUVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
508519
opcodestr, "$vd, $vs2, $rs1$vm">;
509520

510521
// op vd, rs1, vs2, vm (Float) (with mask, reverse the order of rs1 and vs2)
511-
class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr>
512-
: RVInstVX<funct6, opv, (outs VR:$vd),
513-
(ins FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
514-
opcodestr, "$vd, $rs1, $vs2$vm">;
522+
class VALUrVF<bits<6> funct6, RISCVVFormat opv, string opcodestr,
523+
bit EarlyClobber = 0>
524+
: RVInstVX<funct6, opv, (outs VR:$vd_wb),
525+
(ins VR:$vd, FPR32:$rs1, VR:$vs2, VMaskOp:$vm),
526+
opcodestr, "$vd, $rs1, $vs2$vm"> {
527+
let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
528+
"$vd = $vd_wb");
529+
}
515530

516531
// op vd, vs2, vm (use vs1 as instruction encoding)
517532
class VALUVs2<bits<6> funct6, bits<5> vs1, RISCVVFormat opv, string opcodestr>
@@ -590,20 +605,26 @@ multiclass VALU_MV_V_X<string opcodestr, bits<6> funct6, string vw> {
590605

591606
multiclass VMAC_MV_V_X<string opcodestr, bits<6> funct6> {
592607
def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
593-
SchedBinaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV">;
608+
SchedTernaryMC<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
609+
"ReadVIMulAddV">;
594610
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
595-
SchedBinaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX">;
611+
SchedTernaryMC<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
612+
"ReadVIMulAddV">;
596613
}
597614

598615
multiclass VWMAC_MV_X<string opcodestr, bits<6> funct6> {
616+
let RVVConstraint = WidenV in
599617
def X : VALUrVX<funct6, OPMVX, opcodestr # ".vx">,
600-
SchedBinaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX">;
618+
SchedTernaryMC<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
619+
"ReadVIWMulAddV">;
601620
}
602621

603622
multiclass VWMAC_MV_V_X<string opcodestr, bits<6> funct6>
604623
: VWMAC_MV_X<opcodestr, funct6> {
605-
def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv">,
606-
SchedBinaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV">;
624+
let RVVConstraint = WidenV in
625+
def V : VALUrVV<funct6, OPMVV, opcodestr # ".vv", /*EarlyClobber*/1>,
626+
SchedTernaryMC<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
627+
"ReadVIWMulAddV">;
607628
}
608629

609630
multiclass VALU_MV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
@@ -693,16 +714,22 @@ multiclass VWMUL_FV_V_F<string opcodestr, bits<6> funct6> {
693714

694715
multiclass VMAC_FV_V_F<string opcodestr, bits<6> funct6> {
695716
def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
696-
SchedBinaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV">;
717+
SchedTernaryMC<"WriteVFMulAddV", "ReadVFMulAddV", "ReadVFMulAddV",
718+
"ReadVFMulAddV">;
697719
def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
698-
SchedBinaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF">;
720+
SchedTernaryMC<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
721+
"ReadVFMulAddV">;
699722
}
700723

701724
multiclass VWMAC_FV_V_F<string opcodestr, bits<6> funct6> {
702-
def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv">,
703-
SchedBinaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV">;
704-
def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf">,
705-
SchedBinaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF">;
725+
let RVVConstraint = WidenV in {
726+
def V : VALUrVV<funct6, OPFVV, opcodestr # ".vv", /*EarlyClobber*/1>,
727+
SchedTernaryMC<"WriteVFWMulAddV", "ReadVFWMulAddV", "ReadVFWMulAddV",
728+
"ReadVFWMulAddV">;
729+
def F : VALUrVF<funct6, OPFVF, opcodestr # ".vf", /*EarlyClobber*/1>,
730+
SchedTernaryMC<"WriteVFWMulAddF", "ReadVFWMulAddV", "ReadVFWMulAddF",
731+
"ReadVFWMulAddV">;
732+
}
706733
}
707734

708735
multiclass VSQR_FV_VS2<string opcodestr, bits<6> funct6, bits<5> vs1> {
@@ -1289,12 +1316,10 @@ defm VMADD_V : VMAC_MV_V_X<"vmadd", 0b101001>;
12891316
defm VNMSUB_V : VMAC_MV_V_X<"vnmsub", 0b101011>;
12901317

12911318
// Vector Widening Integer Multiply-Add Instructions
1292-
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV in {
12931319
defm VWMACCU_V : VWMAC_MV_V_X<"vwmaccu", 0b111100>;
12941320
defm VWMACC_V : VWMAC_MV_V_X<"vwmacc", 0b111101>;
12951321
defm VWMACCSU_V : VWMAC_MV_V_X<"vwmaccsu", 0b111111>;
12961322
defm VWMACCUS_V : VWMAC_MV_X<"vwmaccus", 0b111110>;
1297-
} // Constraints = "@earlyclobber $vd", RVVConstraint = WidenV
12981323

12991324
// Vector Integer Merge Instructions
13001325
defm VMERGE_V : VMRG_IV_V_X_I<"vmerge", 0b010111>;
@@ -1394,8 +1419,7 @@ defm VFNMSUB_V : VMAC_FV_V_F<"vfnmsub", 0b101011>;
13941419
}
13951420

13961421
// Vector Widening Floating-Point Fused Multiply-Add Instructions
1397-
let Constraints = "@earlyclobber $vd", RVVConstraint = WidenV,
1398-
Uses = [FRM], mayRaiseFPException = true in {
1422+
let Uses = [FRM], mayRaiseFPException = true in {
13991423
defm VFWMACC_V : VWMAC_FV_V_F<"vfwmacc", 0b111100>;
14001424
defm VFWNMACC_V : VWMAC_FV_V_F<"vfwnmacc", 0b111101>;
14011425
defm VFWMSAC_V : VWMAC_FV_V_F<"vfwmsac", 0b111110>;

llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3138,8 +3138,8 @@ multiclass VPseudoVMAC_VV_VX_AAXA<string Constraint = ""> {
31383138
SchedTernary<"WriteVIMulAddV", "ReadVIMulAddV", "ReadVIMulAddV",
31393139
"ReadVIMulAddV", mx>;
31403140
defm "" : VPseudoTernaryV_VX_AAXA<m, Constraint>,
3141-
SchedTernary<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddV",
3142-
"ReadVIMulAddX", mx>;
3141+
SchedTernary<"WriteVIMulAddX", "ReadVIMulAddV", "ReadVIMulAddX",
3142+
"ReadVIMulAddV", mx>;
31433143
}
31443144
}
31453145

@@ -3153,8 +3153,8 @@ multiclass VPseudoVMAC_VV_VF_AAXA<string Constraint = ""> {
31533153
foreach f = FPList in {
31543154
foreach m = f.MxList in {
31553155
defm "" : VPseudoTernaryV_VF_AAXA<m, f, Constraint>,
3156-
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddV",
3157-
"ReadVFMulAddF", m.MX>;
3156+
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
3157+
"ReadVFMulAddV", m.MX>;
31583158
}
31593159
}
31603160
}
@@ -3169,8 +3169,8 @@ multiclass VPseudoVMAC_VV_VF_AAXA_RM<string Constraint = ""> {
31693169
foreach f = FPList in {
31703170
foreach m = f.MxList in {
31713171
defm "" : VPseudoTernaryV_VF_AAXA_RM<m, f, Constraint>,
3172-
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddV",
3173-
"ReadVFMulAddF", m.MX>;
3172+
SchedTernary<"WriteVFMulAddF", "ReadVFMulAddV", "ReadVFMulAddF",
3173+
"ReadVFMulAddV", m.MX>;
31743174
}
31753175
}
31763176
}
@@ -3193,16 +3193,16 @@ multiclass VPseudoVWMAC_VV_VX {
31933193
SchedTernary<"WriteVIWMulAddV", "ReadVIWMulAddV", "ReadVIWMulAddV",
31943194
"ReadVIWMulAddV", mx>;
31953195
defm "" : VPseudoTernaryW_VX<m>,
3196-
SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddV",
3197-
"ReadVIWMulAddX", mx>;
3196+
SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
3197+
"ReadVIWMulAddV", mx>;
31983198
}
31993199
}
32003200

32013201
multiclass VPseudoVWMAC_VX {
32023202
foreach m = MxListW in {
32033203
defm "" : VPseudoTernaryW_VX<m>,
3204-
SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddV",
3205-
"ReadVIWMulAddX", m.MX>;
3204+
SchedTernary<"WriteVIWMulAddX", "ReadVIWMulAddV", "ReadVIWMulAddX",
3205+
"ReadVIWMulAddV", m.MX>;
32063206
}
32073207
}
32083208

@@ -3217,7 +3217,7 @@ multiclass VPseudoVWMAC_VV_VF_RM {
32173217
foreach m = f.MxListFW in {
32183218
defm "" : VPseudoTernaryW_VF_RM<m, f>,
32193219
SchedTernary<"WriteVFWMulAddF", "ReadVFWMulAddV",
3220-
"ReadVFWMulAddV", "ReadVFWMulAddF", m.MX>;
3220+
"ReadVFWMulAddF", "ReadVFWMulAddV", m.MX>;
32213221
}
32223222
}
32233223
}
@@ -3242,7 +3242,7 @@ multiclass VPseudoVWMAC_VV_VF_BF_RM {
32423242

32433243
defm "" : VPseudoTernaryW_VF_BF_RM<m, f>,
32443244
Sched<[WriteVFWMulAddF_MX, ReadVFWMulAddV_MX,
3245-
ReadVFWMulAddV_MX, ReadVFWMulAddF_MX, ReadVMask]>;
3245+
ReadVFWMulAddF_MX, ReadVFWMulAddV_MX, ReadVMask]>;
32463246
}
32473247
}
32483248
}

llvm/lib/Target/RISCV/RISCVInstrInfoXTHead.td

Lines changed: 23 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -56,16 +56,24 @@ class THInstVdotVX<bits<6> funct6, RISCVVFormat opv, dag outs, dag ins,
5656

5757
let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
5858
// op vd, vs1, vs2, vm (reverse the order of vs1 and vs2)
59-
class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr>
60-
: THInstVdotVV<funct6, opv, (outs VR:$vd),
61-
(ins VR:$vs1, VR:$vs2, VMaskOp:$vm),
62-
opcodestr, "$vd, $vs1, $vs2$vm">;
59+
class THVdotALUrVV<bits<6> funct6, RISCVVFormat opv, string opcodestr,
60+
bit EarlyClobber>
61+
: THInstVdotVV<funct6, opv, (outs VR:$vd_wb),
62+
(ins VR:$vd, VR:$vs1, VR:$vs2, VMaskOp:$vm),
63+
opcodestr, "$vd, $vs1, $vs2$vm"> {
64+
let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
65+
"$vd = $vd_wb");
66+
}
6367

6468
// op vd, rs1, vs2, vm (reverse the order of rs1 and vs2)
65-
class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr>
66-
: THInstVdotVX<funct6, opv, (outs VR:$vd),
67-
(ins GPR:$rs1, VR:$vs2, VMaskOp:$vm),
68-
opcodestr, "$vd, $rs1, $vs2$vm">;
69+
class THVdotALUrVX<bits<6> funct6, RISCVVFormat opv, string opcodestr,
70+
bit EarlyClobber>
71+
: THInstVdotVX<funct6, opv, (outs VR:$vd_wb),
72+
(ins VR:$vd, GPR:$rs1, VR:$vs2, VMaskOp:$vm),
73+
opcodestr, "$vd, $rs1, $vs2$vm"> {
74+
let Constraints = !if(EarlyClobber, "@earlyclobber $vd_wb, $vd = $vd_wb",
75+
"$vd = $vd_wb");
76+
}
6977
} // hasSideEffects = 0, mayLoad = 0, mayStore = 0
7078

7179
let Predicates = [HasVendorXTHeadBa], DecoderNamespace = "XTHeadBa",
@@ -227,12 +235,14 @@ class THStoreUpdate<bits<5> funct5, string opcodestr>
227235
//===----------------------------------------------------------------------===//
228236

229237
multiclass THVdotVMAQA_VX<string opcodestr, bits<6> funct6> {
230-
def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx">;
238+
let RVVConstraint = WidenV in
239+
def _VX : THVdotALUrVX<funct6, OPMVX, opcodestr # ".vx", /*EarlyClobber*/1>;
231240
}
232241

233-
multiclass THVdotVMAQA<string opcodestr, bits<6> funct6> {
234-
def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv">;
235-
defm "" : THVdotVMAQA_VX<opcodestr, funct6>;
242+
multiclass THVdotVMAQA<string opcodestr, bits<6> funct6>
243+
: THVdotVMAQA_VX<opcodestr, funct6> {
244+
let RVVConstraint = WidenV in
245+
def _VV : THVdotALUrVV<funct6, OPMVX, opcodestr # ".vv", /*EarlyClobber*/1>;
236246
}
237247

238248
//===----------------------------------------------------------------------===//
@@ -448,9 +458,7 @@ def TH_FSURD : THStoreIndexed<FPR64, 0b01110, "th.fsurd">,
448458
Sched<[WriteFST64, ReadFStoreData, ReadFMemBase]>;
449459
}
450460

451-
let Predicates = [HasVendorXTHeadVdot],
452-
Constraints = "@earlyclobber $vd",
453-
RVVConstraint = WidenV in {
461+
let Predicates = [HasVendorXTHeadVdot] in {
454462
defm THVdotVMAQA : THVdotVMAQA<"th.vmaqa", 0b100000>;
455463
defm THVdotVMAQAU : THVdotVMAQA<"th.vmaqau", 0b100010>;
456464
defm THVdotVMAQASU : THVdotVMAQA<"th.vmaqasu", 0b100100>;

llvm/lib/Target/RISCV/RISCVInstrInfoZvfbf.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@ let Uses = [FRM] in
2525
defm VFNCVTBF16_F_F_W : VNCVTF_FV_VS2<"vfncvtbf16.f.f.w", 0b010010, 0b11101>;
2626
}
2727

28-
let Predicates = [HasStdExtZvfbfwma], Constraints = "@earlyclobber $vd",
28+
let Predicates = [HasStdExtZvfbfwma],
29+
Constraints = "@earlyclobber $vd_wb, $vd = $vd_wb",
2930
RVVConstraint = WidenV, Uses = [FRM], mayRaiseFPException = true in {
3031
defm VFWMACCBF16_V : VWMAC_FV_V_F<"vfwmaccbf16", 0b111011>;
3132
}

0 commit comments

Comments
 (0)