Skip to content

Commit 9bf6bef

Browse files
committed
[AArch64] Optimize add/sub with immediate
Optimize ([add|sub] r, imm) -> ([ADD|SUB] ([ADD|SUB] r, #imm0, lsl rust-lang#12), #imm1), if imm == (imm0<<12)+imm1. and both imm0 and imm1 are non-zero 12-bit unsigned integers. Optimize ([add|sub] r, imm) -> ([SUB|ADD] ([SUB|ADD] r, #imm0, lsl rust-lang#12), #imm1), if imm == -(imm0<<12)-imm1, and both imm0 and imm1 are non-zero 12-bit unsigned integers. Reviewed By: jaykang10, dmgreen Differential Revision: https://reviews.llvm.org/D111034
1 parent f071110 commit 9bf6bef

File tree

3 files changed

+229
-60
lines changed

3 files changed

+229
-60
lines changed

llvm/lib/Target/AArch64/AArch64MIPeepholeOpt.cpp

+161-27
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,17 @@
1111
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
1212
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
1313
//
14+
// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15+
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16+
//
17+
// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18+
// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19+
//
1420
// The mov pseudo instruction could be expanded to multiple mov instructions
1521
// later. In this case, we could try to split the constant operand of mov
16-
// instruction into two bitmask immediates. It makes two AND instructions
17-
// intead of multiple `mov` + `and` instructions.
22+
// instruction into two immediates which can be directly encoded into
23+
// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24+
// multiple `mov` + `and/add/sub` instructions.
1825
//===----------------------------------------------------------------------===//
1926

2027
#include "AArch64ExpandImm.h"
@@ -41,6 +48,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
4148
MachineLoopInfo *MLI;
4249
MachineRegisterInfo *MRI;
4350

51+
bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
52+
MachineInstr *&SubregToRegMI);
53+
54+
template <typename T>
55+
bool visitADDSUB(MachineInstr &MI,
56+
SmallSetVector<MachineInstr *, 8> &ToBeRemoved, bool IsAdd);
57+
4458
template <typename T>
4559
bool visitAND(MachineInstr &MI,
4660
SmallSetVector<MachineInstr *, 8> &ToBeRemoved);
@@ -119,31 +133,9 @@ bool AArch64MIPeepholeOpt::visitAND(
119133
assert((RegSize == 32 || RegSize == 64) &&
120134
"Invalid RegSize for AND bitmask peephole optimization");
121135

122-
// Check whether AND's MBB is in loop and the AND is loop invariant.
123-
MachineBasicBlock *MBB = MI.getParent();
124-
MachineLoop *L = MLI->getLoopFor(MBB);
125-
if (L && !L->isLoopInvariant(MI))
126-
return false;
127-
128-
// Check whether AND's operand is MOV with immediate.
129-
MachineInstr *MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
130-
MachineInstr *SubregToRegMI = nullptr;
131-
// If it is SUBREG_TO_REG, check its operand.
132-
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
133-
SubregToRegMI = MovMI;
134-
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
135-
}
136-
137-
if (MovMI->getOpcode() != AArch64::MOVi32imm &&
138-
MovMI->getOpcode() != AArch64::MOVi64imm)
139-
return false;
140-
141-
// If the MOV has multiple uses, do not split the immediate because it causes
142-
// more instructions.
143-
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
144-
return false;
145-
146-
if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
136+
// Perform several essential checks against current MI.
137+
MachineInstr *MovMI, *SubregToRegMI;
138+
if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
147139
return false;
148140

149141
// Split the bitmask immediate into two.
@@ -160,6 +152,7 @@ bool AArch64MIPeepholeOpt::visitAND(
160152

161153
// Create new AND MIs.
162154
DebugLoc DL = MI.getDebugLoc();
155+
MachineBasicBlock *MBB = MI.getParent();
163156
const TargetRegisterClass *ANDImmRC =
164157
(RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
165158
Register DstReg = MI.getOperand(0).getReg();
@@ -185,6 +178,135 @@ bool AArch64MIPeepholeOpt::visitAND(
185178
return true;
186179
}
187180

181+
template <typename T>
182+
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
183+
// The immediate must be in the form of ((imm0 << 12) + imm1), in which both
184+
// imm0 and imm1 are non-zero 12-bit unsigned int.
185+
if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
186+
(Imm & ~static_cast<T>(0xffffff)) != 0)
187+
return false;
188+
189+
// The immediate can not be composed via a single instruction.
190+
SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
191+
AArch64_IMM::expandMOVImm(Imm, RegSize, Insn);
192+
if (Insn.size() == 1)
193+
return false;
194+
195+
// Split Imm into (Imm0 << 12) + Imm1;
196+
Imm0 = (Imm >> 12) & 0xfff;
197+
Imm1 = Imm & 0xfff;
198+
return true;
199+
}
200+
201+
template <typename T>
202+
bool AArch64MIPeepholeOpt::visitADDSUB(
203+
MachineInstr &MI, SmallSetVector<MachineInstr *, 8> &ToBeRemoved,
204+
bool IsAdd) {
205+
// Try below transformation.
206+
//
207+
// MOVi32imm + ADDWrr ==> ANDWri + ANDWri
208+
// MOVi64imm + ADDXrr ==> ANDXri + ANDXri
209+
//
210+
// MOVi32imm + SUBWrr ==> SUBWri + SUBWri
211+
// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
212+
//
213+
// The mov pseudo instruction could be expanded to multiple mov instructions
214+
// later. Let's try to split the constant operand of mov instruction into two
215+
// legal add/sub immediates. It makes only two ADD/SUB instructions intead of
216+
// multiple `mov` + `and/sub` instructions.
217+
218+
unsigned RegSize = sizeof(T) * 8;
219+
assert((RegSize == 32 || RegSize == 64) &&
220+
"Invalid RegSize for legal add/sub immediate peephole optimization");
221+
222+
// Perform several essential checks against current MI.
223+
MachineInstr *MovMI, *SubregToRegMI;
224+
if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
225+
return false;
226+
227+
// Split the immediate to Imm0 and Imm1, and calculate the Opcode.
228+
T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
229+
unsigned Opcode;
230+
if (splitAddSubImm(Imm, RegSize, Imm0, Imm1)) {
231+
if (IsAdd)
232+
Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
233+
else
234+
Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
235+
} else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1)) {
236+
if (IsAdd)
237+
Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
238+
else
239+
Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
240+
} else {
241+
return false;
242+
}
243+
244+
// Create new ADD/SUB MIs.
245+
DebugLoc DL = MI.getDebugLoc();
246+
MachineBasicBlock *MBB = MI.getParent();
247+
const TargetRegisterClass *RC =
248+
(RegSize == 32) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
249+
Register DstReg = MI.getOperand(0).getReg();
250+
Register SrcReg = MI.getOperand(1).getReg();
251+
Register TmpReg = MRI->createVirtualRegister(RC);
252+
253+
MRI->constrainRegClass(SrcReg, RC);
254+
BuildMI(*MBB, MI, DL, TII->get(Opcode), TmpReg)
255+
.addReg(SrcReg)
256+
.addImm(Imm0)
257+
.addImm(12);
258+
259+
MRI->constrainRegClass(DstReg, RC);
260+
BuildMI(*MBB, MI, DL, TII->get(Opcode), DstReg)
261+
.addReg(TmpReg)
262+
.addImm(Imm1)
263+
.addImm(0);
264+
265+
// Record the MIs need to be removed.
266+
ToBeRemoved.insert(&MI);
267+
if (SubregToRegMI)
268+
ToBeRemoved.insert(SubregToRegMI);
269+
ToBeRemoved.insert(MovMI);
270+
271+
return true;
272+
}
273+
274+
// Checks if the corresponding MOV immediate instruction is applicable for
275+
// this peephole optimization.
276+
bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
277+
MachineInstr *&MovMI,
278+
MachineInstr *&SubregToRegMI) {
279+
// Check whether current MI is in loop and is loop invariant.
280+
MachineBasicBlock *MBB = MI.getParent();
281+
MachineLoop *L = MLI->getLoopFor(MBB);
282+
if (L && !L->isLoopInvariant(MI))
283+
return false;
284+
285+
// Check whether current MI's operand is MOV with immediate.
286+
MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
287+
SubregToRegMI = nullptr;
288+
// If it is SUBREG_TO_REG, check its operand.
289+
if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
290+
SubregToRegMI = MovMI;
291+
MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
292+
}
293+
294+
if (MovMI->getOpcode() != AArch64::MOVi32imm &&
295+
MovMI->getOpcode() != AArch64::MOVi64imm)
296+
return false;
297+
298+
// If the MOV has multiple uses, do not split the immediate because it causes
299+
// more instructions.
300+
if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
301+
return false;
302+
303+
if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
304+
return false;
305+
306+
// It is OK to perform this peephole optimization.
307+
return true;
308+
}
309+
188310
bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
189311
if (skipFunction(MF.getFunction()))
190312
return false;
@@ -210,6 +332,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
210332
case AArch64::ANDXrr:
211333
Changed = visitAND<uint64_t>(MI, ToBeRemoved);
212334
break;
335+
case AArch64::ADDWrr:
336+
Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, true);
337+
break;
338+
case AArch64::SUBWrr:
339+
Changed = visitADDSUB<uint32_t>(MI, ToBeRemoved, false);
340+
break;
341+
case AArch64::ADDXrr:
342+
Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, true);
343+
break;
344+
case AArch64::SUBXrr:
345+
Changed = visitADDSUB<uint64_t>(MI, ToBeRemoved, false);
346+
break;
213347
}
214348
}
215349
}

0 commit comments

Comments
 (0)