@@ -60,12 +60,13 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
60
60
MachineLoopInfo *MLI;
61
61
MachineRegisterInfo *MRI;
62
62
63
+ using OpcodePair = std::pair<unsigned , unsigned >;
63
64
template <typename T>
64
65
using SplitAndOpcFunc =
65
- std::function<Optional<unsigned >(T, unsigned , T &, T &)>;
66
+ std::function<Optional<OpcodePair >(T, unsigned , T &, T &)>;
66
67
using BuildMIFunc =
67
- std::function<void (MachineInstr &, unsigned , unsigned , unsigned , Register ,
68
- Register, Register)>;
68
+ std::function<void (MachineInstr &, OpcodePair , unsigned , unsigned ,
69
+ Register, Register, Register )>;
69
70
70
71
// / For instructions where an immediate operand could be split into two
71
72
// / separate immediate instructions, use the splitTwoPartImm two handle the
@@ -93,6 +94,10 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
93
94
bool visitADDSUB (unsigned PosOpc, unsigned NegOpc, MachineInstr &MI,
94
95
SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
95
96
template <typename T>
97
+ bool visitADDSSUBS (OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
98
+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
99
+
100
+ template <typename T>
96
101
bool visitAND (unsigned Opc, MachineInstr &MI,
97
102
SmallSetVector<MachineInstr *, 8 > &ToBeRemoved);
98
103
bool visitORR (MachineInstr &MI,
@@ -171,20 +176,20 @@ bool AArch64MIPeepholeOpt::visitAND(
171
176
172
177
return splitTwoPartImm<T>(
173
178
MI, ToBeRemoved,
174
- [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<unsigned > {
179
+ [Opc](T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair > {
175
180
if (splitBitmaskImm (Imm, RegSize, Imm0, Imm1))
176
- return Opc;
181
+ return std::make_pair ( Opc, Opc) ;
177
182
return None;
178
183
},
179
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
184
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
180
185
unsigned Imm1, Register SrcReg, Register NewTmpReg,
181
186
Register NewDstReg) {
182
187
DebugLoc DL = MI.getDebugLoc ();
183
188
MachineBasicBlock *MBB = MI.getParent ();
184
- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
189
+ BuildMI (*MBB, MI, DL, TII->get (Opcode. first ), NewTmpReg)
185
190
.addReg (SrcReg)
186
191
.addImm (Imm0);
187
- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
192
+ BuildMI (*MBB, MI, DL, TII->get (Opcode. second ), NewDstReg)
188
193
.addReg (NewTmpReg)
189
194
.addImm (Imm1);
190
195
});
@@ -273,23 +278,64 @@ bool AArch64MIPeepholeOpt::visitADDSUB(
273
278
return splitTwoPartImm<T>(
274
279
MI, ToBeRemoved,
275
280
[PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
276
- T &Imm1) -> Optional<unsigned > {
281
+ T &Imm1) -> Optional<OpcodePair > {
277
282
if (splitAddSubImm (Imm, RegSize, Imm0, Imm1))
278
- return PosOpc;
283
+ return std::make_pair ( PosOpc, PosOpc) ;
279
284
if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1))
280
- return NegOpc;
285
+ return std::make_pair ( NegOpc, NegOpc) ;
281
286
return None;
282
287
},
283
- [&TII = TII](MachineInstr &MI, unsigned Opcode, unsigned Imm0,
288
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
289
+ unsigned Imm1, Register SrcReg, Register NewTmpReg,
290
+ Register NewDstReg) {
291
+ DebugLoc DL = MI.getDebugLoc ();
292
+ MachineBasicBlock *MBB = MI.getParent ();
293
+ BuildMI (*MBB, MI, DL, TII->get (Opcode.first ), NewTmpReg)
294
+ .addReg (SrcReg)
295
+ .addImm (Imm0)
296
+ .addImm (12 );
297
+ BuildMI (*MBB, MI, DL, TII->get (Opcode.second ), NewDstReg)
298
+ .addReg (NewTmpReg)
299
+ .addImm (Imm1)
300
+ .addImm (0 );
301
+ });
302
+ }
303
+
304
+ template <typename T>
305
+ bool AArch64MIPeepholeOpt::visitADDSSUBS (
306
+ OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI,
307
+ SmallSetVector<MachineInstr *, 8 > &ToBeRemoved) {
308
+ // Try the same transformation as ADDSUB but with additional requirement
309
+ // that the condition code usages are only for Equal and Not Equal
310
+ return splitTwoPartImm<T>(
311
+ MI, ToBeRemoved,
312
+ [PosOpcs, NegOpcs, &MI, &TRI = TRI, &MRI = MRI](
313
+ T Imm, unsigned RegSize, T &Imm0, T &Imm1) -> Optional<OpcodePair> {
314
+ OpcodePair OP;
315
+ if (splitAddSubImm (Imm, RegSize, Imm0, Imm1))
316
+ OP = PosOpcs;
317
+ else if (splitAddSubImm (-Imm, RegSize, Imm0, Imm1))
318
+ OP = NegOpcs;
319
+ else
320
+ return None;
321
+ // Check conditional uses last since it is expensive for scanning
322
+ // proceeding instructions
323
+ MachineInstr &SrcMI = *MRI->getUniqueVRegDef (MI.getOperand (1 ).getReg ());
324
+ Optional<UsedNZCV> NZCVUsed = examineCFlagsUse (SrcMI, MI, *TRI);
325
+ if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V )
326
+ return None;
327
+ return OP;
328
+ },
329
+ [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
284
330
unsigned Imm1, Register SrcReg, Register NewTmpReg,
285
331
Register NewDstReg) {
286
332
DebugLoc DL = MI.getDebugLoc ();
287
333
MachineBasicBlock *MBB = MI.getParent ();
288
- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewTmpReg)
334
+ BuildMI (*MBB, MI, DL, TII->get (Opcode. first ), NewTmpReg)
289
335
.addReg (SrcReg)
290
336
.addImm (Imm0)
291
337
.addImm (12 );
292
- BuildMI (*MBB, MI, DL, TII->get (Opcode), NewDstReg)
338
+ BuildMI (*MBB, MI, DL, TII->get (Opcode. second ), NewDstReg)
293
339
.addReg (NewTmpReg)
294
340
.addImm (Imm1)
295
341
.addImm (0 );
@@ -357,33 +403,57 @@ bool AArch64MIPeepholeOpt::splitTwoPartImm(
357
403
// number since it was sign extended when we assign to the 64-bit Imm.
358
404
if (SubregToRegMI)
359
405
Imm &= 0xFFFFFFFF ;
360
- unsigned Opcode;
406
+ OpcodePair Opcode;
361
407
if (auto R = SplitAndOpc (Imm, RegSize, Imm0, Imm1))
362
408
Opcode = R.getValue ();
363
409
else
364
410
return false ;
365
411
366
- // Create new ADD/SUB MIs.
412
+ // Create new MIs using the first and second opcodes. Opcodes might differ for
413
+ // flag setting operations that should only set flags on second instruction.
414
+ // NewTmpReg = Opcode.first SrcReg Imm0
415
+ // NewDstReg = Opcode.second NewTmpReg Imm1
416
+
417
+ // Determine register classes for destinations and register operands
367
418
MachineFunction *MF = MI.getMF ();
368
- const TargetRegisterClass *RC =
369
- TII->getRegClass (TII->get (Opcode), 0 , TRI, *MF);
370
- const TargetRegisterClass *ORC =
371
- TII->getRegClass (TII->get (Opcode), 1 , TRI, *MF);
419
+ const TargetRegisterClass *FirstInstrDstRC =
420
+ TII->getRegClass (TII->get (Opcode.first ), 0 , TRI, *MF);
421
+ const TargetRegisterClass *FirstInstrOperandRC =
422
+ TII->getRegClass (TII->get (Opcode.first ), 1 , TRI, *MF);
423
+ const TargetRegisterClass *SecondInstrDstRC =
424
+ (Opcode.first == Opcode.second )
425
+ ? FirstInstrDstRC
426
+ : TII->getRegClass (TII->get (Opcode.second ), 0 , TRI, *MF);
427
+ const TargetRegisterClass *SecondInstrOperandRC =
428
+ (Opcode.first == Opcode.second )
429
+ ? FirstInstrOperandRC
430
+ : TII->getRegClass (TII->get (Opcode.second ), 1 , TRI, *MF);
431
+
432
+ // Get old registers destinations and new register destinations
372
433
Register DstReg = MI.getOperand (0 ).getReg ();
373
434
Register SrcReg = MI.getOperand (1 ).getReg ();
374
- Register NewTmpReg = MRI->createVirtualRegister (RC);
375
- Register NewDstReg = MRI->createVirtualRegister (RC);
376
-
377
- MRI->constrainRegClass (SrcReg, RC);
378
- MRI->constrainRegClass (NewTmpReg, ORC);
379
- MRI->constrainRegClass (NewDstReg, MRI->getRegClass (DstReg));
380
-
435
+ Register NewTmpReg = MRI->createVirtualRegister (FirstInstrDstRC);
436
+ // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
437
+ // reuse that same destination register.
438
+ Register NewDstReg = DstReg.isVirtual ()
439
+ ? MRI->createVirtualRegister (SecondInstrDstRC)
440
+ : DstReg;
441
+
442
+ // Constrain registers based on their new uses
443
+ MRI->constrainRegClass (SrcReg, FirstInstrOperandRC);
444
+ MRI->constrainRegClass (NewTmpReg, SecondInstrOperandRC);
445
+ if (DstReg != NewDstReg)
446
+ MRI->constrainRegClass (NewDstReg, MRI->getRegClass (DstReg));
447
+
448
+ // Call the delegating operation to build the instruction
381
449
BuildInstr (MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
382
450
383
- MRI->replaceRegWith (DstReg, NewDstReg);
384
451
// replaceRegWith changes MI's definition register. Keep it for SSA form until
385
- // deleting MI.
386
- MI.getOperand (0 ).setReg (DstReg);
452
+ // deleting MI. Only if we made a new destination register.
453
+ if (DstReg != NewDstReg) {
454
+ MRI->replaceRegWith (DstReg, NewDstReg);
455
+ MI.getOperand (0 ).setReg (DstReg);
456
+ }
387
457
388
458
// Record the MIs need to be removed.
389
459
ToBeRemoved.insert (&MI);
@@ -439,6 +509,26 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
439
509
Changed = visitADDSUB<uint64_t >(AArch64::SUBXri, AArch64::ADDXri, MI,
440
510
ToBeRemoved);
441
511
break ;
512
+ case AArch64::ADDSWrr:
513
+ Changed = visitADDSSUBS<uint32_t >({AArch64::ADDWri, AArch64::ADDSWri},
514
+ {AArch64::SUBWri, AArch64::SUBSWri},
515
+ MI, ToBeRemoved);
516
+ break ;
517
+ case AArch64::SUBSWrr:
518
+ Changed = visitADDSSUBS<uint32_t >({AArch64::SUBWri, AArch64::SUBSWri},
519
+ {AArch64::ADDWri, AArch64::ADDSWri},
520
+ MI, ToBeRemoved);
521
+ break ;
522
+ case AArch64::ADDSXrr:
523
+ Changed = visitADDSSUBS<uint64_t >({AArch64::ADDXri, AArch64::ADDSXri},
524
+ {AArch64::SUBXri, AArch64::SUBSXri},
525
+ MI, ToBeRemoved);
526
+ break ;
527
+ case AArch64::SUBSXrr:
528
+ Changed = visitADDSSUBS<uint64_t >({AArch64::SUBXri, AArch64::SUBSXri},
529
+ {AArch64::ADDXri, AArch64::ADDSXri},
530
+ MI, ToBeRemoved);
531
+ break ;
442
532
}
443
533
}
444
534
}
0 commit comments