11
11
// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri
12
12
// MOVi64imm + ANDXrr ==> ANDXri + ANDXri
13
13
//
14
+ // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15
+ // MOVi64imm + ADDXrr ==> ANDXri + ANDXri
16
+ //
17
+ // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19
+ //
14
20
// The mov pseudo instruction could be expanded to multiple mov instructions
15
21
// later. In this case, we could try to split the constant operand of mov
16
- // instruction into two bitmask immediates. It makes two AND instructions
17
- // intead of multiple `mov` + `and` instructions.
22
+ // instruction into two immediates which can be directly encoded into
23
+ // *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24
+ // multiple `mov` + `and/add/sub` instructions.
18
25
//
19
- // 2 . Remove redundant ORRWrs which is generated by zero-extend.
26
+ // 4 . Remove redundant ORRWrs which is generated by zero-extend.
20
27
//
21
28
// %3:gpr32 = ORRWrs $wzr, %2, 0
22
29
// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
@@ -51,6 +58,12 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
51
58
MachineLoopInfo *MLI;
52
59
MachineRegisterInfo *MRI;
53
60
61
+ bool checkMovImmInstr (MachineInstr &MI, MachineInstr *&MovMI,
62
+ MachineInstr *&SubregToRegMI);
63
+
64
+ template <typename T>
65
+ bool visitADDSUB (MachineInstr &MI,
66
+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved, bool IsAdd);
54
67
template <typename T>
55
68
bool visitAND (MachineInstr &MI,
56
69
SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
@@ -131,36 +144,9 @@ bool AArch64MIPeepholeOpt::visitAND(
131
144
assert ((RegSize == 32 || RegSize == 64 ) &&
132
145
" Invalid RegSize for AND bitmask peephole optimization" );
133
146
134
- // Check whether AND's MBB is in loop and the AND is loop invariant.
135
- MachineBasicBlock *MBB = MI.getParent ();
136
- MachineLoop *L = MLI->getLoopFor (MBB);
137
- if (L && !L->isLoopInvariant (MI))
138
- return false ;
139
-
140
- // Check whether AND's operand is MOV with immediate.
141
- MachineInstr *MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
142
- if (!MovMI)
143
- return false ;
144
-
145
- MachineInstr *SubregToRegMI = nullptr ;
146
- // If it is SUBREG_TO_REG, check its operand.
147
- if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
148
- SubregToRegMI = MovMI;
149
- MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
150
- if (!MovMI)
151
- return false ;
152
- }
153
-
154
- if (MovMI->getOpcode () != AArch64::MOVi32imm &&
155
- MovMI->getOpcode () != AArch64::MOVi64imm)
156
- return false ;
157
-
158
- // If the MOV has multiple uses, do not split the immediate because it causes
159
- // more instructions.
160
- if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
161
- return false ;
162
-
163
- if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
147
+ // Perform several essential checks against current MI.
148
+ MachineInstr *MovMI = nullptr , *SubregToRegMI = nullptr ;
149
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
164
150
return false ;
165
151
166
152
// Split the bitmask immediate into two.
@@ -177,6 +163,7 @@ bool AArch64MIPeepholeOpt::visitAND(
177
163
178
164
// Create new AND MIs.
179
165
DebugLoc DL = MI.getDebugLoc ();
166
+ MachineBasicBlock *MBB = MI.getParent ();
180
167
const TargetRegisterClass *ANDImmRC =
181
168
(RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
182
169
Register DstReg = MI.getOperand (0 ).getReg ();
@@ -251,6 +238,145 @@ bool AArch64MIPeepholeOpt::visitORR(
251
238
return true ;
252
239
}
253
240
241
+ template <typename T>
242
+ static bool splitAddSubImm (T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
243
+ // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
244
+ // imm0 and imm1 are non-zero 12-bit unsigned int.
245
+ if ((Imm & 0xfff000 ) == 0 || (Imm & 0xfff ) == 0 ||
246
+ (Imm & ~static_cast <T>(0xffffff )) != 0 )
247
+ return false ;
248
+
249
+ // The immediate can not be composed via a single instruction.
250
+ SmallVector<AArch64_IMM::ImmInsnModel, 4 > Insn;
251
+ AArch64_IMM::expandMOVImm (Imm, RegSize, Insn);
252
+ if (Insn.size () == 1 )
253
+ return false ;
254
+
255
+ // Split Imm into (Imm0 << 12) + Imm1;
256
+ Imm0 = (Imm >> 12 ) & 0xfff ;
257
+ Imm1 = Imm & 0xfff ;
258
+ return true ;
259
+ }
260
+
261
+ template <typename T>
262
+ bool AArch64MIPeepholeOpt::visitADDSUB (
263
+ MachineInstr &MI, SmallSetVector<MachineInstr *, 8 > &ToBeRemoved,
264
+ bool IsAdd) {
265
+ // Try below transformation.
266
+ //
267
+ // MOVi32imm + ADDWrr ==> ADDWri + ADDWri
268
+ // MOVi64imm + ADDXrr ==> ADDXri + ADDXri
269
+ //
270
+ // MOVi32imm + SUBWrr ==> SUBWri + SUBWri
271
+ // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
272
+ //
273
+ // The mov pseudo instruction could be expanded to multiple mov instructions
274
+ // later. Let's try to split the constant operand of mov instruction into two
275
+ // legal add/sub immediates. It makes only two ADD/SUB instructions intead of
276
+ // multiple `mov` + `and/sub` instructions.
277
+
278
+ unsigned RegSize = sizeof (T) * 8 ;
279
+ assert ((RegSize == 32 || RegSize == 64 ) &&
280
+ " Invalid RegSize for legal add/sub immediate peephole optimization" );
281
+
282
+ // Perform several essential checks against current MI.
283
+ MachineInstr *MovMI, *SubregToRegMI;
284
+ if (!checkMovImmInstr (MI, MovMI, SubregToRegMI))
285
+ return false ;
286
+
287
+ // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
288
+ T Imm = static_cast <T>(MovMI->getOperand (1 ).getImm ()), Imm0, Imm1;
289
+ unsigned Opcode;
290
+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1)) {
291
+ if (IsAdd)
292
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
293
+ else
294
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
295
+ } else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1)) {
296
+ if (IsAdd)
297
+ Opcode = RegSize == 32 ? AArch64::SUBWri : AArch64::SUBXri;
298
+ else
299
+ Opcode = RegSize == 32 ? AArch64::ADDWri : AArch64::ADDXri;
300
+ } else {
301
+ return false ;
302
+ }
303
+
304
+ // Create new ADD/SUB MIs.
305
+ DebugLoc DL = MI.getDebugLoc ();
306
+ MachineBasicBlock *MBB = MI.getParent ();
307
+ const TargetRegisterClass *RC =
308
+ (RegSize == 32 ) ? &AArch64::GPR32spRegClass : &AArch64::GPR64spRegClass;
309
+ Register DstReg = MI.getOperand (0 ).getReg ();
310
+ Register SrcReg = MI.getOperand (1 ).getReg ();
311
+ Register NewTmpReg = MRI->createVirtualRegister (RC);
312
+ Register NewDstReg = MRI->createVirtualRegister (RC);
313
+
314
+ MRI->constrainRegClass (SrcReg, RC);
315
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
316
+ .addReg (SrcReg)
317
+ .addImm (Imm0)
318
+ .addImm (12 );
319
+
320
+ MRI->constrainRegClass (NewDstReg, MRI->getRegClass (DstReg));
321
+ BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
322
+ .addReg (NewTmpReg)
323
+ .addImm (Imm1)
324
+ .addImm (0 );
325
+
326
+ MRI->replaceRegWith (DstReg, NewDstReg);
327
+ // replaceRegWith changes MI's definition register. Keep it for SSA form until
328
+ // deleting MI.
329
+ MI.getOperand (0 ).setReg (DstReg);
330
+
331
+ // Record the MIs need to be removed.
332
+ ToBeRemoved.insert (&MI);
333
+ if (SubregToRegMI)
334
+ ToBeRemoved.insert (SubregToRegMI);
335
+ ToBeRemoved.insert (MovMI);
336
+
337
+ return true ;
338
+ }
339
+
340
+ // Checks if the corresponding MOV immediate instruction is applicable for
341
+ // this peephole optimization.
342
+ bool AArch64MIPeepholeOpt::checkMovImmInstr (MachineInstr &MI,
343
+ MachineInstr *&MovMI,
344
+ MachineInstr *&SubregToRegMI) {
345
+ // Check whether current MBB is in loop and the AND is loop invariant.
346
+ MachineBasicBlock *MBB = MI.getParent ();
347
+ MachineLoop *L = MLI->getLoopFor (MBB);
348
+ if (L && !L->isLoopInvariant (MI))
349
+ return false ;
350
+
351
+ // Check whether current MI's operand is MOV with immediate.
352
+ MovMI = MRI->getUniqueVRegDef (MI.getOperand (2 ).getReg ());
353
+ if (!MovMI)
354
+ return false ;
355
+
356
+ // If it is SUBREG_TO_REG, check its operand.
357
+ SubregToRegMI = nullptr ;
358
+ if (MovMI->getOpcode () == TargetOpcode::SUBREG_TO_REG) {
359
+ SubregToRegMI = MovMI;
360
+ MovMI = MRI->getUniqueVRegDef (MovMI->getOperand (2 ).getReg ());
361
+ if (!MovMI)
362
+ return false ;
363
+ }
364
+
365
+ if (MovMI->getOpcode () != AArch64::MOVi32imm &&
366
+ MovMI->getOpcode () != AArch64::MOVi64imm)
367
+ return false ;
368
+
369
+ // If the MOV has multiple uses, do not split the immediate because it causes
370
+ // more instructions.
371
+ if (!MRI->hasOneUse (MovMI->getOperand (0 ).getReg ()))
372
+ return false ;
373
+ if (SubregToRegMI && !MRI->hasOneUse (SubregToRegMI->getOperand (0 ).getReg ()))
374
+ return false ;
375
+
376
+ // It is OK to perform this peephole optimization.
377
+ return true ;
378
+ }
379
+
254
380
bool AArch64MIPeepholeOpt::runOnMachineFunction (MachineFunction &MF) {
255
381
if (skipFunction (MF.getFunction ()))
256
382
return false ;
@@ -278,6 +404,18 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
278
404
case AArch64::ORRWrs:
279
405
Changed = visitORR (MI, ToBeRemoved);
280
406
break ;
407
+ case AArch64::ADDWrr:
408
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, true );
409
+ break ;
410
+ case AArch64::SUBWrr:
411
+ Changed = visitADDSUB<uint32_t >(MI, ToBeRemoved, false );
412
+ break ;
413
+ case AArch64::ADDXrr:
414
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, true );
415
+ break ;
416
+ case AArch64::SUBXrr:
417
+ Changed = visitADDSUB<uint64_t >(MI, ToBeRemoved, false );
418
+ break ;
281
419
}
282
420
}
283
421
}
0 commit comments