@@ -411,10 +411,10 @@ static bool hasIrregularType(Type *Ty, const DataLayout &DL) {
411
411
// / 3) Returns upper bound estimate if known, and if \p CanUseConstantMax.
412
412
// / 4) Returns std::nullopt if all of the above failed.
413
413
static std::optional<unsigned >
414
- getSmallBestKnownTC (ScalarEvolution &SE , Loop *L,
414
+ getSmallBestKnownTC (PredicatedScalarEvolution &PSE , Loop *L,
415
415
bool CanUseConstantMax = true ) {
416
416
// Check if exact trip count is known.
417
- if (unsigned ExpectedTC = SE. getSmallConstantTripCount (L))
417
+ if (unsigned ExpectedTC = PSE. getSE ()-> getSmallConstantTripCount (L))
418
418
return ExpectedTC;
419
419
420
420
// Check if there is an expected trip count available from profile data.
@@ -426,7 +426,7 @@ getSmallBestKnownTC(ScalarEvolution &SE, Loop *L,
426
426
return std::nullopt;
427
427
428
428
// Check if upper bound estimate is known.
429
- if (unsigned ExpectedTC = SE .getSmallConstantMaxTripCount (L ))
429
+ if (unsigned ExpectedTC = PSE .getSmallConstantMaxTripCount ())
430
430
return ExpectedTC;
431
431
432
432
return std::nullopt;
@@ -1789,12 +1789,15 @@ class GeneratedRTChecks {
1789
1789
1790
1790
Loop *OuterLoop = nullptr ;
1791
1791
1792
+ PredicatedScalarEvolution &PSE;
1793
+
1792
1794
public:
1793
- GeneratedRTChecks (ScalarEvolution &SE, DominatorTree *DT, LoopInfo *LI,
1794
- TargetTransformInfo *TTI, const DataLayout &DL,
1795
- bool AddBranchWeights)
1796
- : DT(DT), LI(LI), TTI(TTI), SCEVExp(SE, DL, " scev.check" ),
1797
- MemCheckExp (SE, DL, " scev.check" ), AddBranchWeights(AddBranchWeights) {}
1795
+ GeneratedRTChecks (PredicatedScalarEvolution &PSE, DominatorTree *DT,
1796
+ LoopInfo *LI, TargetTransformInfo *TTI,
1797
+ const DataLayout &DL, bool AddBranchWeights)
1798
+ : DT(DT), LI(LI), TTI(TTI), SCEVExp(*PSE.getSE(), DL, " scev.check" ),
1799
+ MemCheckExp (*PSE.getSE(), DL, "scev.check"),
1800
+ AddBranchWeights(AddBranchWeights), PSE(PSE) {}
1798
1801
1799
1802
// / Generate runtime checks in SCEVCheckBlock and MemCheckBlock, so we can
1800
1803
// / accurately estimate the cost of the runtime checks. The blocks are
@@ -1941,7 +1944,7 @@ class GeneratedRTChecks {
1941
1944
1942
1945
// Get the best known TC estimate.
1943
1946
if (auto EstimatedTC = getSmallBestKnownTC (
1944
- *SE , OuterLoop, /* CanUseConstantMax = */ false ))
1947
+ PSE , OuterLoop, /* CanUseConstantMax = */ false ))
1945
1948
BestTripCount = *EstimatedTC;
1946
1949
1947
1950
BestTripCount = std::max (BestTripCount, 1U );
@@ -2272,8 +2275,7 @@ static bool isIndvarOverflowCheckKnownFalse(
2272
2275
// We know the runtime overflow check is known false iff the (max) trip-count
2273
2276
// is known and (max) trip-count + (VF * UF) does not overflow in the type of
2274
2277
// the vector loop induction variable.
2275
- if (unsigned TC =
2276
- Cost->PSE .getSE ()->getSmallConstantMaxTripCount (Cost->TheLoop )) {
2278
+ if (unsigned TC = Cost->PSE .getSmallConstantMaxTripCount ()) {
2277
2279
uint64_t MaxVF = VF.getKnownMinValue ();
2278
2280
if (VF.isScalable ()) {
2279
2281
std::optional<unsigned > MaxVScale =
@@ -3962,8 +3964,10 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) {
3962
3964
}
3963
3965
3964
3966
unsigned TC = PSE.getSE ()->getSmallConstantTripCount (TheLoop);
3965
- unsigned MaxTC = PSE.getSE ()-> getSmallConstantMaxTripCount (TheLoop );
3967
+ unsigned MaxTC = PSE.getSmallConstantMaxTripCount ();
3966
3968
LLVM_DEBUG (dbgs () << " LV: Found trip count: " << TC << ' \n ' );
3969
+ if (TC != MaxTC)
3970
+ LLVM_DEBUG (dbgs () << " LV: Found maximum trip count: " << MaxTC << ' \n ' );
3967
3971
if (TC == 1 ) {
3968
3972
reportVectorizationFailure (" Single iteration (non) loop" ,
3969
3973
" loop trip count is one, irrelevant for vectorization" ,
@@ -4257,7 +4261,7 @@ bool LoopVectorizationPlanner::isMoreProfitable(
4257
4261
InstructionCost CostA = A.Cost ;
4258
4262
InstructionCost CostB = B.Cost ;
4259
4263
4260
- unsigned MaxTripCount = PSE.getSE ()-> getSmallConstantMaxTripCount (OrigLoop );
4264
+ unsigned MaxTripCount = PSE.getSmallConstantMaxTripCount ();
4261
4265
4262
4266
// Improve estimate for the vector width if it is scalable.
4263
4267
unsigned EstimatedWidthA = A.Width .getKnownMinValue ();
@@ -4852,7 +4856,7 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF,
4852
4856
if (!Legal->isSafeForAnyVectorWidth ())
4853
4857
return 1 ;
4854
4858
4855
- auto BestKnownTC = getSmallBestKnownTC (* PSE. getSE () , TheLoop);
4859
+ auto BestKnownTC = getSmallBestKnownTC (PSE, TheLoop);
4856
4860
const bool HasReductions = !Legal->getReductionVars ().empty ();
4857
4861
4858
4862
// If we did not calculate the cost for VF (because the user selected the VF)
@@ -9618,8 +9622,8 @@ static bool processLoopInVPlanNativePath(
9618
9622
{
9619
9623
bool AddBranchWeights =
9620
9624
hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
9621
- GeneratedRTChecks Checks (* PSE. getSE () , DT, LI, TTI,
9622
- F-> getDataLayout (), AddBranchWeights);
9625
+ GeneratedRTChecks Checks (PSE, DT, LI, TTI, F-> getDataLayout () ,
9626
+ AddBranchWeights);
9623
9627
InnerLoopVectorizer LB (L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width ,
9624
9628
VF.Width , 1 , LVL, &CM, BFI, PSI, Checks);
9625
9629
LLVM_DEBUG (dbgs () << " Vectorizing outer loop in \" "
@@ -9683,7 +9687,7 @@ static void checkMixedPrecision(Loop *L, OptimizationRemarkEmitter *ORE) {
9683
9687
static bool areRuntimeChecksProfitable (GeneratedRTChecks &Checks,
9684
9688
VectorizationFactor &VF,
9685
9689
std::optional<unsigned > VScale, Loop *L,
9686
- ScalarEvolution &SE ,
9690
+ PredicatedScalarEvolution &PSE ,
9687
9691
ScalarEpilogueLowering SEL) {
9688
9692
InstructionCost CheckCost = Checks.getCost ();
9689
9693
if (!CheckCost.isValid ())
@@ -9768,7 +9772,7 @@ static bool areRuntimeChecksProfitable(GeneratedRTChecks &Checks,
9768
9772
9769
9773
// Skip vectorization if the expected trip count is less than the minimum
9770
9774
// required trip count.
9771
- if (auto ExpectedTC = getSmallBestKnownTC (SE , L)) {
9775
+ if (auto ExpectedTC = getSmallBestKnownTC (PSE , L)) {
9772
9776
if (ElementCount::isKnownLT (ElementCount::getFixed (*ExpectedTC),
9773
9777
VF.MinProfitableTripCount )) {
9774
9778
LLVM_DEBUG (dbgs () << " LV: Vectorization is not beneficial: expected "
@@ -9875,7 +9879,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9875
9879
9876
9880
// Check the loop for a trip count threshold: vectorize loops with a tiny trip
9877
9881
// count by optimizing for size, to minimize overheads.
9878
- auto ExpectedTC = getSmallBestKnownTC (*SE , L);
9882
+ auto ExpectedTC = getSmallBestKnownTC (PSE , L);
9879
9883
if (ExpectedTC && *ExpectedTC < TinyTripCountVectorThreshold) {
9880
9884
LLVM_DEBUG (dbgs () << " LV: Found a loop with a very small trip count. "
9881
9885
<< " This loop is worth vectorizing only if no scalar "
@@ -9973,8 +9977,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9973
9977
9974
9978
bool AddBranchWeights =
9975
9979
hasBranchWeightMD (*L->getLoopLatch ()->getTerminator ());
9976
- GeneratedRTChecks Checks (* PSE. getSE () , DT, LI, TTI,
9977
- F-> getDataLayout (), AddBranchWeights);
9980
+ GeneratedRTChecks Checks (PSE, DT, LI, TTI, F-> getDataLayout () ,
9981
+ AddBranchWeights);
9978
9982
if (LVP.hasPlanWithVF (VF.Width )) {
9979
9983
// Select the interleave count.
9980
9984
IC = CM.selectInterleaveCount (VF.Width , VF.Cost );
@@ -9990,7 +9994,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
9990
9994
Hints.getForce () == LoopVectorizeHints::FK_Enabled;
9991
9995
if (!ForceVectorization &&
9992
9996
!areRuntimeChecksProfitable (Checks, VF, getVScaleForTuning (L, *TTI), L,
9993
- * PSE. getSE () , SEL)) {
9997
+ PSE, SEL)) {
9994
9998
ORE->emit ([&]() {
9995
9999
return OptimizationRemarkAnalysisAliasing (
9996
10000
DEBUG_TYPE, " CantReorderMemOps" , L->getStartLoc (),
0 commit comments