Skip to content

Commit c69af70

Browse files
red1bluelostdavemgreen
authored andcommitted
[AArch64] Adds SUBS and ADDS instructions to the MIPeepholeOpt.
Implements ADDS/SUBS 24-bit immediate optimization using the MIPeepholeOpt pass. This follows the pattern: Optimize ([adds|subs] r, imm) -> ([ADDS|SUBS] ([ADD|SUB] r, #imm0, lsl rust-lang#12), #imm1), if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned integers. Optimize ([adds|subs] r, imm) -> ([SUBS|ADDS] ([SUB|ADD] r, #imm0, lsl rust-lang#12), #imm1), if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned integers. The SplitAndOpcFunc type had to change the return type to an Opcode pair so that the first add/sub is the regular instruction and the second is the flag setting instruction. This required updating the code in the AND case. Testing: I ran a two stage bootstrap with this code. Using the second stage compiler, I verified that the negation of an ADDS to SUBS or vice versa is a valid optimization. Example V == -0x111111. Reviewed By: dmgreen Differential Revision: https://reviews.llvm.org/D118663
1 parent b8801ba commit c69af70

File tree

5 files changed

+452
-72
lines changed

5 files changed

+452
-72
lines changed

llvm/lib/Target/AArch64/AArch64InstrInfo.cpp

+10-32
Original file line numberDiff line numberDiff line change
@@ -1547,27 +1547,6 @@ findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
15471547
}
15481548
}
15491549

1550-
namespace {
1551-
1552-
struct UsedNZCV {
1553-
bool N = false;
1554-
bool Z = false;
1555-
bool C = false;
1556-
bool V = false;
1557-
1558-
UsedNZCV() = default;
1559-
1560-
UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1561-
this->N |= UsedFlags.N;
1562-
this->Z |= UsedFlags.Z;
1563-
this->C |= UsedFlags.C;
1564-
this->V |= UsedFlags.V;
1565-
return *this;
1566-
}
1567-
};
1568-
1569-
} // end anonymous namespace
1570-
15711550
/// Find a condition code used by the instruction.
15721551
/// Returns AArch64CC::Invalid if either the instruction does not use condition
15731552
/// codes or we don't optimize CmpInstr in the presence of such instructions.
@@ -1622,15 +1601,15 @@ static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
16221601
return UsedFlags;
16231602
}
16241603

1625-
/// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
1626-
/// are not containing C or V flags and NZCV flags are not alive in successors
1627-
/// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
1604+
/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1605+
/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1606+
/// \returns None otherwise.
16281607
///
16291608
/// Collect instructions using that flags in \p CCUseInstrs if provided.
1630-
static Optional<UsedNZCV>
1631-
examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1632-
const TargetRegisterInfo &TRI,
1633-
SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
1609+
Optional<UsedNZCV>
1610+
llvm::examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
1611+
const TargetRegisterInfo &TRI,
1612+
SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
16341613
MachineBasicBlock *CmpParent = CmpInstr.getParent();
16351614
if (MI.getParent() != CmpParent)
16361615
return None;
@@ -1652,8 +1631,6 @@ examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
16521631
if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
16531632
break;
16541633
}
1655-
if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
1656-
return None;
16571634
return NZCVUsedAfterCmp;
16581635
}
16591636

@@ -1684,7 +1661,8 @@ static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
16841661
if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
16851662
return false;
16861663

1687-
if (!examineCFlagsUse(MI, CmpInstr, TRI))
1664+
Optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1665+
if (!NZVCUsed || NZVCUsed->C || NZVCUsed->V)
16881666
return false;
16891667

16901668
AccessKind AccessToCheck = AK_Write;
@@ -1773,7 +1751,7 @@ static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
17731751
examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
17741752
// Condition flags are not used in CmpInstr basic block successors and only
17751753
// Z or N flags allowed to be used after CmpInstr within its basic block
1776-
if (!NZCVUsedAfterCmp)
1754+
if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
17771755
return false;
17781756
// Z or N flag used after CmpInstr must correspond to the flag used in MI
17791757
if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||

llvm/lib/Target/AArch64/AArch64InstrInfo.h

+27
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,33 @@ class AArch64InstrInfo final : public AArch64GenInstrInfo {
362362
const MachineRegisterInfo *MRI) const;
363363
};
364364

365+
struct UsedNZCV {
366+
bool N = false;
367+
bool Z = false;
368+
bool C = false;
369+
bool V = false;
370+
371+
UsedNZCV() = default;
372+
373+
UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
374+
this->N |= UsedFlags.N;
375+
this->Z |= UsedFlags.Z;
376+
this->C |= UsedFlags.C;
377+
this->V |= UsedFlags.V;
378+
return *this;
379+
}
380+
};
381+
382+
/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
383+
/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
384+
/// \returns None otherwise.
385+
///
386+
/// Collect instructions using that flags in \p CCUseInstrs if provided.
387+
Optional<UsedNZCV>
388+
examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
389+
const TargetRegisterInfo &TRI,
390+
SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr);
391+
365392
/// Return true if there is an instruction /after/ \p DefMI and before \p UseMI
366393
/// which either reads or clobbers NZCV.
367394
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

+120-30
Original file line numberDiff line numberDiff line change
@@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
6060
MachineLoopInfo *MLI;
6161
MachineRegisterInfo *MRI;
6262

63+
using OpcodePair = std::pair<unsigned, unsigned>;
6364
template <typename T>
6465
using SplitAndOpcFunc =
65-
std::function<Optional<unsigned>(T, unsigned, T &, T &)>;
66+
std::function<Optional<OpcodePair>(T, unsigned, T &, T &)>;
6667
using BuildMIFunc =
67-
std::function<void(MachineInstr &, unsigned, unsigned, unsigned, Register,
68-
Register, Register)>;
68+
std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
69+
Register, Register, Register)>;
6970

7071
/// For instructions where an immediate operand could be split into two
7172
/// separate immediate instructions, use the splitTwoPartImm two handle the
@@ -93,6 +94,10 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
9394
bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
9495
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
9596
template <typename T>
97+
bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
98+
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
99+
100+
template <typename T>
96101
bool visitAND(unsigned Opc, MachineInstr &MI,
97102
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
98103
bool visitORR(MachineInstr &MI,
@@ -171,20 +176,20 @@ bool AArch64MIPeepholeOpt::visitAND(
171176

172177
return splitTwoPartImm<T>(
173178
MI, ToBeRemoved,
174-
[Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned> {
179+
[Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
175180
if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1))
176-
return Opc;
181+
return std::make_pair(Opc, Opc);
177182
return None;
178183
},
179-
[&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
184+
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
180185
unsigned Imm1, Register SrcReg, Register NewTmpReg,
181186
Register NewDstReg) {
182187
DebugLoc DL = MI.getDebugLoc();
183188
MachineBasicBlock *MBB = MI.getParent();
184-
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
189+
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
185190
.addReg(SrcReg)
186191
.addImm(Imm0);
187-
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
192+
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
188193
.addReg(NewTmpReg)
189194
.addImm(Imm1);
190195
});
@@ -273,23 +278,64 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
273278
return splitTwoPartImm<T>(
274279
MI, ToBeRemoved,
275280
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
276-
T &Imm1) -> Optional<unsigned> {
281+
T &Imm1) -> Optional<OpcodePair> {
277282
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
278-
return PosOpc;
283+
return std::make_pair(PosOpc, PosOpc);
279284
if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
280-
return NegOpc;
285+
return std::make_pair(NegOpc, NegOpc);
281286
return None;
282287
},
283-
[&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
288+
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
289+
unsigned Imm1, Register SrcReg, Register NewTmpReg,
290+
Register NewDstReg) {
291+
DebugLoc DL = MI.getDebugLoc();
292+
MachineBasicBlock *MBB = MI.getParent();
293+
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
294+
.addReg(SrcReg)
295+
.addImm(Imm0)
296+
.addImm(12);
297+
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
298+
.addReg(NewTmpReg)
299+
.addImm(Imm1)
300+
.addImm(0);
301+
});
302+
}
303+
304+
template <typename T>
305+
bool AArch64MIPeepholeOpt::visitADDSSUBS(
306+
OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
307+
SmallSetVector<MachineInstr *, 8> &ToBeRemoved) {
308+
// Try the same transformation as ADDSUB but with additional requirement
309+
// that the condition code usages are only for Equal and Not Equal
310+
return splitTwoPartImm<T>(
311+
MI, ToBeRemoved,
312+
[PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
313+
T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
314+
OpcodePair OP;
315+
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
316+
OP = PosOpcs;
317+
else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
318+
OP = NegOpcs;
319+
else
320+
return None;
321+
// Check conditional uses last since it is expensive for scanning
322+
// proceeding instructions
323+
MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
324+
Optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
325+
if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
326+
return None;
327+
return OP;
328+
},
329+
[&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
284330
unsigned Imm1, Register SrcReg, Register NewTmpReg,
285331
Register NewDstReg) {
286332
DebugLoc DL = MI.getDebugLoc();
287333
MachineBasicBlock *MBB = MI.getParent();
288-
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewTmpReg)
334+
BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
289335
.addReg(SrcReg)
290336
.addImm(Imm0)
291337
.addImm(12);
292-
BuildMI(*MBB, MI, DL, TII->get(Opcode), NewDstReg)
338+
BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
293339
.addReg(NewTmpReg)
294340
.addImm(Imm1)
295341
.addImm(0);
@@ -357,33 +403,57 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
357403
// number since it was sign extended when we assign to the 64-bit Imm.
358404
if (SubregToRegMI)
359405
Imm &= 0xFFFFFFFF;
360-
unsigned Opcode;
406+
OpcodePair Opcode;
361407
if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
362408
Opcode = R.getValue();
363409
else
364410
return false;
365411

366-
// Create new ADD/SUB MIs.
412+
// Create new MIs using the first and second opcodes. Opcodes might differ for
413+
// flag setting operations that should only set flags on second instruction.
414+
// NewTmpReg = Opcode.first SrcReg Imm0
415+
// NewDstReg = Opcode.second NewTmpReg Imm1
416+
417+
// Determine register classes for destinations and register operands
367418
MachineFunction *MF = MI.getMF();
368-
const TargetRegisterClass *RC =
369-
TII->getRegClass(TII->get(Opcode), 0, TRI, *MF);
370-
const TargetRegisterClass *ORC =
371-
TII->getRegClass(TII->get(Opcode), 1, TRI, *MF);
419+
const TargetRegisterClass *FirstInstrDstRC =
420+
TII->getRegClass(TII->get(Opcode.first), 0, TRI, *MF);
421+
const TargetRegisterClass *FirstInstrOperandRC =
422+
TII->getRegClass(TII->get(Opcode.first), 1, TRI, *MF);
423+
const TargetRegisterClass *SecondInstrDstRC =
424+
(Opcode.first == Opcode.second)
425+
? FirstInstrDstRC
426+
: TII->getRegClass(TII->get(Opcode.second), 0, TRI, *MF);
427+
const TargetRegisterClass *SecondInstrOperandRC =
428+
(Opcode.first == Opcode.second)
429+
? FirstInstrOperandRC
430+
: TII->getRegClass(TII->get(Opcode.second), 1, TRI, *MF);
431+
432+
// Get old registers destinations and new register destinations
372433
Register DstReg = MI.getOperand(0).getReg();
373434
Register SrcReg = MI.getOperand(1).getReg();
374-
Register NewTmpReg = MRI->createVirtualRegister(RC);
375-
Register NewDstReg = MRI->createVirtualRegister(RC);
376-
377-
MRI->constrainRegClass(SrcReg, RC);
378-
MRI->constrainRegClass(NewTmpReg, ORC);
379-
MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
380-
435+
Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
436+
// In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
437+
// reuse that same destination register.
438+
Register NewDstReg = DstReg.isVirtual()
439+
? MRI->createVirtualRegister(SecondInstrDstRC)
440+
: DstReg;
441+
442+
// Constrain registers based on their new uses
443+
MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
444+
MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
445+
if (DstReg != NewDstReg)
446+
MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
447+
448+
// Call the delegating operation to build the instruction
381449
BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
382450

383-
MRI->replaceRegWith(DstReg, NewDstReg);
384451
// replaceRegWith changes MI's definition register. Keep it for SSA form until
385-
// deleting MI.
386-
MI.getOperand(0).setReg(DstReg);
452+
// deleting MI. Only if we made a new destination register.
453+
if (DstReg != NewDstReg) {
454+
MRI->replaceRegWith(DstReg, NewDstReg);
455+
MI.getOperand(0).setReg(DstReg);
456+
}
387457

388458
// Record the MIs need to be removed.
389459
ToBeRemoved.insert(&MI);
@@ -439,6 +509,26 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
439509
Changed = visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI,
440510
ToBeRemoved);
441511
break;
512+
case AArch64::ADDSWrr:
513+
Changed = visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
514+
{AArch64::SUBWri, AArch64::SUBSWri},
515+
MI, ToBeRemoved);
516+
break;
517+
case AArch64::SUBSWrr:
518+
Changed = visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
519+
{AArch64::ADDWri, AArch64::ADDSWri},
520+
MI, ToBeRemoved);
521+
break;
522+
case AArch64::ADDSXrr:
523+
Changed = visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
524+
{AArch64::SUBXri, AArch64::SUBSXri},
525+
MI, ToBeRemoved);
526+
break;
527+
case AArch64::SUBSXrr:
528+
Changed = visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
529+
{AArch64::ADDXri, AArch64::ADDSXri},
530+
MI, ToBeRemoved);
531+
break;
442532
}
443533
}
444534
}

0 commit comments

Comments
 (0)