11
11
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12
12
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13
13
//
14
+ // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15
+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16
+ //
17
+ // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19
+ //
14
20
// The mov pseudo instruction could be expanded to multiple mov instructions
15
21
// later. In this case, we could try to split the constant operand of mov
16
- // instruction into two bitmask immediates. It makes two AND instructions
17
- // intead of multiple `mov` + `and` instructions.
22
+ // instruction into two immediates which can be directly encoded into
23
+ // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24
+ // multiple `mov` + `and/add/sub` instructions.
18
25
// ===----------------------------------------------------------------------===//
19
26
20
27
#include " AArch64ExpandImm.h"
@@ -41,6 +48,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
41
48
MachineLoopInfo *MLI;
42
49
MachineRegisterInfo *MRI;
43
50
51
+ bool checkMovImmInstr (MachineInstr &MI, MachineInstr *&MovMI,
52
+ MachineInstr *&SubregToRegMI);
53
+
54
+ template <typename T>
55
+ bool visitADDSUB (MachineInstr &MI,
56
+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved, bool IsAdd);
57
+
44
58
template <typename T>
45
59
bool visitAND (MachineInstr &MI,
46
60
SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
@@ -119,31 +133,9 @@ bool AArch64MIPeepholeOpt::visitAND(
119
133
assert ((RegSize == 32 || RegSize == 64 ) &&
120
134
" Invalid RegSize for AND bitmask peephole optimization" );
121
135
122
- // Check whether AND's MBB is in loop and the AND is loop invariant.
123
- MachineBasicBlock *MBB = MI.getParent ();
124
- MachineLoop *L = MLI->getLoopFor (MBB);
125
- if (L && !L->isLoopInvariant (MI))
126
- return false ;
127
-
128
- // Check whether AND's operand is MOV with immediate.
129
- MachineInstr *MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
130
- MachineInstr *SubregToRegMI = nullptr ;
131
- // If it is SUBREG_TO_REG, check its operand.
132
- if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
133
- SubregToRegMI = MovMI;
134
- MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
135
- }
136
-
137
- if (MovMI->getOpcode () != AArch64::MOVi32imm &&
138
- MovMI->getOpcode () != AArch64::MOVi64imm)
139
- return false ;
140
-
141
- // If the MOV has multiple uses, do not split the immediate because it causes
142
- // more instructions.
143
- if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
144
- return false ;
145
-
146
- if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
136
+ // Perform several essential checks against current MI.
137
+ MachineInstr *MovMI, *SubregToRegMI;
138
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
147
139
return false ;
148
140
149
141
// Split the bitmask immediate into two.
@@ -160,6 +152,7 @@ bool AArch64MIPeepholeOpt::visitAND(
160
152
161
153
// Create new AND MIs.
162
154
DebugLoc DL = MI.getDebugLoc ();
155
+ MachineBasicBlock *MBB = MI.getParent ();
163
156
const TargetRegisterClass *ANDImmRC =
164
157
(RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
165
158
Register DstReg = MI.getOperand (0 ).getReg ();
@@ -185,6 +178,135 @@ bool AArch64MIPeepholeOpt::visitAND(
185
178
return true ;
186
179
}
187
180
181
+ template <typename T>
182
+ static bool splitAddSubImm (T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
183
+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
184
+ // imm0 and imm1 are non-zero 12-bit unsigned int.
185
+ if ((Imm & 0xfff000 ) == 0 || (Imm & 0xfff ) == 0 ||
186
+ (Imm & ~static_cast <T>(0xffffff )) != 0 )
187
+ return false ;
188
+
189
+ // The immediate can not be composed via a single instruction.
190
+ SmallVector<AArch64_IMM::ImmInsnModel, 4 > Insn;
191
+ AArch64_IMM::expandMOVImm (Imm, RegSize, Insn);
192
+ if (Insn.size () == 1 )
193
+ return false ;
194
+
195
+ // Split Imm into (Imm0 << 12) + Imm1;
196
+ Imm0 = (Imm >> 12 ) & 0xfff ;
197
+ Imm1 = Imm & 0xfff ;
198
+ return true ;
199
+ }
200
+
201
+ template <typename T>
202
+ bool AArch64MIPeepholeOpt::visitADDSUB (
203
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8 > &ToBeRemoved,
204
+ bool IsAdd) {
205
+ // Try below transformation.
206
+ //
207
+ // MOVi32imm + ADDWrr ==> ANDWri + ANDWri
208
+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
209
+ //
210
+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
211
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
212
+ //
213
+ // The mov pseudo instruction could be expanded to multiple mov instructions
214
+ // later. Let's try to split the constant operand of mov instruction into two
215
+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
216
+ // multiple `mov` + `and/sub` instructions.
217
+
218
+ unsigned RegSize = sizeof (T) * 8 ;
219
+ assert ((RegSize == 32 || RegSize == 64 ) &&
220
+ " Invalid RegSize for legal add/sub immediate peephole optimization" );
221
+
222
+ // Perform several essential checks against current MI.
223
+ MachineInstr *MovMI, *SubregToRegMI;
224
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
225
+ return false ;
226
+
227
+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
228
+ T Imm = static_cast <T>(MovMI->getOperand (1 ).getImm ()), Imm0, Imm1;
229
+ unsigned Opcode;
230
+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1)) {
231
+ if (IsAdd)
232
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
233
+ else
234
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
235
+ } else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1)) {
236
+ if (IsAdd)
237
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
238
+ else
239
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
240
+ } else {
241
+ return false ;
242
+ }
243
+
244
+ // Create new ADD/SUB MIs.
245
+ DebugLoc DL = MI.getDebugLoc ();
246
+ MachineBasicBlock *MBB = MI.getParent ();
247
+ const TargetRegisterClass *RC =
248
+ (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
249
+ Register DstReg = MI.getOperand (0 ).getReg ();
250
+ Register SrcReg = MI.getOperand (1 ).getReg ();
251
+ Register TmpReg = MRI->createVirtualRegister (RC);
252
+
253
+ MRI->constrainRegClass (SrcReg, RC);
254
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), TmpReg)
255
+ .addReg (SrcReg)
256
+ .addImm (Imm0)
257
+ .addImm (12 );
258
+
259
+ MRI->constrainRegClass (DstReg, RC);
260
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), DstReg)
261
+ .addReg (TmpReg)
262
+ .addImm (Imm1)
263
+ .addImm (0 );
264
+
265
+ // Record the MIs need to be removed.
266
+ ToBeRemoved.insert (&MI);
267
+ if (SubregToRegMI)
268
+ ToBeRemoved.insert (SubregToRegMI);
269
+ ToBeRemoved.insert (MovMI);
270
+
271
+ return true ;
272
+ }
273
+
274
+ // Checks if the corresponding MOV immediate instruction is applicable for
275
+ // this peephole optimization.
276
+ bool AArch64MIPeepholeOpt::checkMovImmInstr (MachineInstr &MI,
277
+ MachineInstr *&MovMI,
278
+ MachineInstr *&SubregToRegMI) {
279
+ // Check whether current MI is in loop and is loop invariant.
280
+ MachineBasicBlock *MBB = MI.getParent ();
281
+ MachineLoop *L = MLI->getLoopFor (MBB);
282
+ if (L && !L->isLoopInvariant (MI))
283
+ return false ;
284
+
285
+ // Check whether current MI's operand is MOV with immediate.
286
+ MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
287
+ SubregToRegMI = nullptr ;
288
+ // If it is SUBREG_TO_REG, check its operand.
289
+ if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
290
+ SubregToRegMI = MovMI;
291
+ MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
292
+ }
293
+
294
+ if (MovMI->getOpcode () != AArch64::MOVi32imm &&
295
+ MovMI->getOpcode () != AArch64::MOVi64imm)
296
+ return false ;
297
+
298
+ // If the MOV has multiple uses, do not split the immediate because it causes
299
+ // more instructions.
300
+ if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
301
+ return false ;
302
+
303
+ if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
304
+ return false ;
305
+
306
+ // It is OK to perform this peephole optimization.
307
+ return true ;
308
+ }
309
+
188
310
bool AArch64MIPeepholeOpt::runOnMachineFunction (MachineFunction &MF) {
189
311
if (skipFunction (MF.getFunction ()))
190
312
return false ;
@@ -210,6 +332,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
210
332
case AArch64::ANDXrr:
211
333
Changed = visitAND<uint64_t >(MI, ToBeRemoved);
212
334
break ;
335
+ case AArch64::ADDWrr:
336
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, true );
337
+ break ;
338
+ case AArch64::SUBWrr:
339
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, false );
340
+ break ;
341
+ case AArch64::ADDXrr:
342
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, true );
343
+ break ;
344
+ case AArch64::SUBXrr:
345
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, false );
346
+ break ;
213
347
}
214
348
}
215
349
}
0 commit comments