@@ -2521,9 +2521,104 @@ bool SwingSchedulerDAG::schedulePipeline(SMSchedule &Schedule) {
2521
2521
return scheduleFound && Schedule.getMaxStageCount () > 0 ;
2522
2522
}
2523
2523
2524
+ static Register findUniqueOperandDefinedInLoop (const MachineInstr &MI) {
2525
+ const MachineRegisterInfo &MRI = MI.getParent ()->getParent ()->getRegInfo ();
2526
+ Register Result;
2527
+ for (const MachineOperand &Use : MI.all_uses ()) {
2528
+ Register Reg = Use.getReg ();
2529
+ if (!Reg.isVirtual ())
2530
+ return Register ();
2531
+ if (MRI.getVRegDef (Reg)->getParent () != MI.getParent ())
2532
+ continue ;
2533
+ if (Result)
2534
+ return Register ();
2535
+ Result = Reg;
2536
+ }
2537
+ return Result;
2538
+ }
2539
+
2540
+ // / When Op is a value that is incremented recursively in a loop and there is a
2541
+ // / unique instruction that increments it, returns true and sets Value.
2542
+ static bool findLoopIncrementValue (const MachineOperand &Op, int &Value) {
2543
+ if (!Op.isReg () || !Op.getReg ().isVirtual ())
2544
+ return false ;
2545
+
2546
+ Register OrgReg = Op.getReg ();
2547
+ Register CurReg = OrgReg;
2548
+ const MachineBasicBlock *LoopBB = Op.getParent ()->getParent ();
2549
+ const MachineRegisterInfo &MRI = LoopBB->getParent ()->getRegInfo ();
2550
+
2551
+ const TargetInstrInfo *TII =
2552
+ LoopBB->getParent ()->getSubtarget ().getInstrInfo ();
2553
+ const TargetRegisterInfo *TRI =
2554
+ LoopBB->getParent ()->getSubtarget ().getRegisterInfo ();
2555
+
2556
+ MachineInstr *Phi = nullptr ;
2557
+ MachineInstr *Increment = nullptr ;
2558
+
2559
+ // Traverse definitions until it reaches Op or an instruction that does not
2560
+ // satisfy the condition.
2561
+ // Acceptable example:
2562
+ // bb.0:
2563
+ // %0 = PHI %3, %bb.0, ...
2564
+ // %2 = ADD %0, Value
2565
+ // ... = LOAD %2(Op)
2566
+ // %3 = COPY %2
2567
+ while (true ) {
2568
+ if (!CurReg.isValid () || !CurReg.isVirtual ())
2569
+ return false ;
2570
+ MachineInstr *Def = MRI.getVRegDef (CurReg);
2571
+ if (Def->getParent () != LoopBB)
2572
+ return false ;
2573
+
2574
+ if (Def->isCopy ()) {
2575
+ // Ignore copy instructions unless they contain subregisters
2576
+ if (Def->getOperand (0 ).getSubReg () || Def->getOperand (1 ).getSubReg ())
2577
+ return false ;
2578
+ CurReg = Def->getOperand (1 ).getReg ();
2579
+ } else if (Def->isPHI ()) {
2580
+ // There must be just one Phi
2581
+ if (Phi)
2582
+ return false ;
2583
+ Phi = Def;
2584
+ CurReg = getLoopPhiReg (*Def, LoopBB);
2585
+ } else if (TII->getIncrementValue (*Def, Value)) {
2586
+ // Potentially a unique increment
2587
+ if (Increment)
2588
+ // Multiple increments exist
2589
+ return false ;
2590
+
2591
+ const MachineOperand *BaseOp;
2592
+ int64_t Offset;
2593
+ bool OffsetIsScalable;
2594
+ if (TII->getMemOperandWithOffset (*Def, BaseOp, Offset, OffsetIsScalable,
2595
+ TRI)) {
2596
+ // Pre/post increment instruction
2597
+ CurReg = BaseOp->getReg ();
2598
+ } else {
2599
+ // If only one of the operands is defined within the loop, it is assumed
2600
+ // to be an incremented value.
2601
+ CurReg = findUniqueOperandDefinedInLoop (*Def);
2602
+ if (!CurReg.isValid ())
2603
+ return false ;
2604
+ }
2605
+ Increment = Def;
2606
+ } else {
2607
+ return false ;
2608
+ }
2609
+ if (CurReg == OrgReg)
2610
+ break ;
2611
+ }
2612
+
2613
+ if (!Phi || !Increment)
2614
+ return false ;
2615
+
2616
+ return true ;
2617
+ }
2618
+
2524
2619
// / Return true if we can compute the amount the instruction changes
2525
2620
// / during each iteration. Set Delta to the amount of the change.
2526
- bool SwingSchedulerDAG::computeDelta (MachineInstr &MI, unsigned &Delta) const {
2621
+ bool SwingSchedulerDAG::computeDelta (const MachineInstr &MI, int &Delta) const {
2527
2622
const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2528
2623
const MachineOperand *BaseOp;
2529
2624
int64_t Offset;
@@ -2538,24 +2633,7 @@ bool SwingSchedulerDAG::computeDelta(MachineInstr &MI, unsigned &Delta) const {
2538
2633
if (!BaseOp->isReg ())
2539
2634
return false ;
2540
2635
2541
- Register BaseReg = BaseOp->getReg ();
2542
-
2543
- MachineRegisterInfo &MRI = MF.getRegInfo ();
2544
- // Check if there is a Phi. If so, get the definition in the loop.
2545
- MachineInstr *BaseDef = MRI.getVRegDef (BaseReg);
2546
- if (BaseDef && BaseDef->isPHI ()) {
2547
- BaseReg = getLoopPhiReg (*BaseDef, MI.getParent ());
2548
- BaseDef = MRI.getVRegDef (BaseReg);
2549
- }
2550
- if (!BaseDef)
2551
- return false ;
2552
-
2553
- int D = 0 ;
2554
- if (!TII->getIncrementValue (*BaseDef, D) && D >= 0 )
2555
- return false ;
2556
-
2557
- Delta = D;
2558
- return true ;
2636
+ return findLoopIncrementValue (*BaseOp, Delta);
2559
2637
}
2560
2638
2561
2639
// / Check if we can change the instruction to use an offset value from the
@@ -2673,6 +2751,100 @@ MachineInstr *SwingSchedulerDAG::findDefInLoop(Register Reg) {
2673
2751
return Def;
2674
2752
}
2675
2753
2754
+ // / Return false if there is no overlap between the region accessed by BaseMI in
2755
+ // / an iteration and the region accessed by OtherMI in subsequent iterations.
2756
+ bool SwingSchedulerDAG::mayOverlapInLaterIter (
2757
+ const MachineInstr *BaseMI, const MachineInstr *OtherMI) const {
2758
+ int DeltaB, DeltaO, Delta;
2759
+ if (!computeDelta (*BaseMI, DeltaB) || !computeDelta (*OtherMI, DeltaO) ||
2760
+ DeltaB != DeltaO)
2761
+ return true ;
2762
+ Delta = DeltaB;
2763
+
2764
+ const MachineOperand *BaseOpB, *BaseOpO;
2765
+ int64_t OffsetB, OffsetO;
2766
+ bool OffsetBIsScalable, OffsetOIsScalable;
2767
+ const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2768
+ if (!TII->getMemOperandWithOffset (*BaseMI, BaseOpB, OffsetB,
2769
+ OffsetBIsScalable, TRI) ||
2770
+ !TII->getMemOperandWithOffset (*OtherMI, BaseOpO, OffsetO,
2771
+ OffsetOIsScalable, TRI))
2772
+ return true ;
2773
+
2774
+ if (OffsetBIsScalable || OffsetOIsScalable)
2775
+ return true ;
2776
+
2777
+ if (!BaseOpB->isIdenticalTo (*BaseOpO)) {
2778
+ // Pass cases with different base operands but same initial values.
2779
+ // Typically for when pre/post increment is used.
2780
+
2781
+ if (!BaseOpB->isReg () || !BaseOpO->isReg ())
2782
+ return true ;
2783
+ Register RegB = BaseOpB->getReg (), RegO = BaseOpO->getReg ();
2784
+ if (!RegB.isVirtual () || !RegO.isVirtual ())
2785
+ return true ;
2786
+
2787
+ MachineInstr *DefB = MRI.getVRegDef (BaseOpB->getReg ());
2788
+ MachineInstr *DefO = MRI.getVRegDef (BaseOpO->getReg ());
2789
+ if (!DefB || !DefO || !DefB->isPHI () || !DefO->isPHI ())
2790
+ return true ;
2791
+
2792
+ unsigned InitValB = 0 ;
2793
+ unsigned LoopValB = 0 ;
2794
+ unsigned InitValO = 0 ;
2795
+ unsigned LoopValO = 0 ;
2796
+ getPhiRegs (*DefB, BB, InitValB, LoopValB);
2797
+ getPhiRegs (*DefO, BB, InitValO, LoopValO);
2798
+ MachineInstr *InitDefB = MRI.getVRegDef (InitValB);
2799
+ MachineInstr *InitDefO = MRI.getVRegDef (InitValO);
2800
+
2801
+ if (!InitDefB->isIdenticalTo (*InitDefO))
2802
+ return true ;
2803
+ }
2804
+
2805
+ LocationSize AccessSizeB = (*BaseMI->memoperands_begin ())->getSize ();
2806
+ LocationSize AccessSizeO = (*OtherMI->memoperands_begin ())->getSize ();
2807
+
2808
+ // This is the main test, which checks the offset values and the loop
2809
+ // increment value to determine if the accesses may be loop carried.
2810
+ if (!AccessSizeB.hasValue () || !AccessSizeO.hasValue ())
2811
+ return true ;
2812
+
2813
+ LLVM_DEBUG ({
2814
+ dbgs () << " Overlap check:\n " ;
2815
+ dbgs () << " BaseMI: " ;
2816
+ BaseMI->dump ();
2817
+ dbgs () << " Base + " << OffsetB << " + I * " << Delta
2818
+ << " , Len: " << AccessSizeB.getValue () << " \n " ;
2819
+ dbgs () << " OtherMI: " ;
2820
+ OtherMI->dump ();
2821
+ dbgs () << " Base + " << OffsetO << " + I * " << Delta
2822
+ << " , Len: " << AccessSizeO.getValue () << " \n " ;
2823
+ });
2824
+
2825
+ // Excessive overlap may be detected in strided patterns.
2826
+ // For example, the memory addresses of the store and the load in
2827
+ // for (i=0; i<n; i+=2) a[i+1] = a[i];
2828
+ // are assumed to overlap.
2829
+ if (Delta < 0 ) {
2830
+ int64_t BaseMinAddr = OffsetB;
2831
+ int64_t OhterNextIterMaxAddr = OffsetO + Delta + AccessSizeO.getValue () - 1 ;
2832
+ if (BaseMinAddr > OhterNextIterMaxAddr) {
2833
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2834
+ return false ;
2835
+ }
2836
+ } else {
2837
+ int64_t BaseMaxAddr = OffsetB + AccessSizeB.getValue () - 1 ;
2838
+ int64_t OtherNextIterMinAddr = OffsetO + Delta;
2839
+ if (BaseMaxAddr < OtherNextIterMinAddr) {
2840
+ LLVM_DEBUG (dbgs () << " Result: No overlap\n " );
2841
+ return false ;
2842
+ }
2843
+ }
2844
+ LLVM_DEBUG (dbgs () << " Result: Overlap\n " );
2845
+ return true ;
2846
+ }
2847
+
2676
2848
// / Return true for an order or output dependence that is loop carried
2677
2849
// / potentially. A dependence is loop carried if the destination defines a value
2678
2850
// / that may be used or defined by the source in a subsequent iteration.
@@ -2704,61 +2876,7 @@ bool SwingSchedulerDAG::isLoopCarriedDep(
2704
2876
// The conservative assumption is that a dependence between memory operations
2705
2877
// may be loop carried. The following code checks when it can be proved that
2706
2878
// there is no loop carried dependence.
2707
- unsigned DeltaS, DeltaD;
2708
- if (!computeDelta (*SI, DeltaS) || !computeDelta (*DI, DeltaD))
2709
- return true ;
2710
-
2711
- const MachineOperand *BaseOpS, *BaseOpD;
2712
- int64_t OffsetS, OffsetD;
2713
- bool OffsetSIsScalable, OffsetDIsScalable;
2714
- const TargetRegisterInfo *TRI = MF.getSubtarget ().getRegisterInfo ();
2715
- if (!TII->getMemOperandWithOffset (*SI, BaseOpS, OffsetS, OffsetSIsScalable,
2716
- TRI) ||
2717
- !TII->getMemOperandWithOffset (*DI, BaseOpD, OffsetD, OffsetDIsScalable,
2718
- TRI))
2719
- return true ;
2720
-
2721
- assert (!OffsetSIsScalable && !OffsetDIsScalable &&
2722
- " Expected offsets to be byte offsets" );
2723
-
2724
- MachineInstr *DefS = MRI.getVRegDef (BaseOpS->getReg ());
2725
- MachineInstr *DefD = MRI.getVRegDef (BaseOpD->getReg ());
2726
- if (!DefS || !DefD || !DefS->isPHI () || !DefD->isPHI ())
2727
- return true ;
2728
-
2729
- unsigned InitValS = 0 ;
2730
- unsigned LoopValS = 0 ;
2731
- unsigned InitValD = 0 ;
2732
- unsigned LoopValD = 0 ;
2733
- getPhiRegs (*DefS, BB, InitValS, LoopValS);
2734
- getPhiRegs (*DefD, BB, InitValD, LoopValD);
2735
- MachineInstr *InitDefS = MRI.getVRegDef (InitValS);
2736
- MachineInstr *InitDefD = MRI.getVRegDef (InitValD);
2737
-
2738
- if (!InitDefS->isIdenticalTo (*InitDefD))
2739
- return true ;
2740
-
2741
- // Check that the base register is incremented by a constant value for each
2742
- // iteration.
2743
- MachineInstr *LoopDefS = MRI.getVRegDef (LoopValS);
2744
- int D = 0 ;
2745
- if (!LoopDefS || !TII->getIncrementValue (*LoopDefS, D))
2746
- return true ;
2747
-
2748
- LocationSize AccessSizeS = (*SI->memoperands_begin ())->getSize ();
2749
- LocationSize AccessSizeD = (*DI->memoperands_begin ())->getSize ();
2750
-
2751
- // This is the main test, which checks the offset values and the loop
2752
- // increment value to determine if the accesses may be loop carried.
2753
- if (!AccessSizeS.hasValue () || !AccessSizeD.hasValue ())
2754
- return true ;
2755
-
2756
- if (DeltaS != DeltaD || DeltaS < AccessSizeS.getValue () ||
2757
- DeltaD < AccessSizeD.getValue ())
2758
- return true ;
2759
-
2760
- return (OffsetS + (int64_t )AccessSizeS.getValue () <
2761
- OffsetD + (int64_t )AccessSizeD.getValue ());
2879
+ return mayOverlapInLaterIter (DI, SI);
2762
2880
}
2763
2881
2764
2882
void SwingSchedulerDAG::postProcessDAG () {
0 commit comments