Skip to content

Commit 27ae17a

Browse files
committed
[SimplifyCFG] use profile metadata to refine merging branch conditions
This is one step towards solving: https://llvm.org/PR49336 In that example, we disregard the recommended usage of builtin_expect, so an expensive (unpredictable) branch is folded into another branch that is guarding it. Here, we read the profile metadata to see if the 1st (predecessor) condition is likely to cause execution to bypass the 2nd (successor) condition before merging conditions by using logic ops. Differential Revision: https://reviews.llvm.org/D98898
1 parent 424bf5d commit 27ae17a

File tree

3 files changed

+89
-44
lines changed

3 files changed

+89
-44
lines changed

llvm/lib/Transforms/Utils/SimplifyCFG.cpp

+41-18
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,7 @@
6363
#include "llvm/IR/User.h"
6464
#include "llvm/IR/Value.h"
6565
#include "llvm/IR/ValueHandle.h"
66+
#include "llvm/Support/BranchProbability.h"
6667
#include "llvm/Support/Casting.h"
6768
#include "llvm/Support/CommandLine.h"
6869
#include "llvm/Support/Debug.h"
@@ -2840,38 +2841,60 @@ static bool extractPredSuccWeights(BranchInst *PBI, BranchInst *BI,
28402841
}
28412842
}
28422843

2843-
// Determine if the two branches share a common destination,
2844-
// and deduce a glue that we need to use to join branch's conditions
2845-
// to arrive at the common destination.
2844+
/// Determine if the two branches share a common destination and deduce a glue
2845+
/// that joins branch's conditions to arrive at the common destination if that
2846+
/// would be profitable.
28462847
static Optional<std::pair<Instruction::BinaryOps, bool>>
2847-
CheckIfCondBranchesShareCommonDestination(BranchInst *BI, BranchInst *PBI) {
2848+
shouldFoldCondBranchesToCommonDestination(BranchInst *BI, BranchInst *PBI,
2849+
const TargetTransformInfo *TTI) {
28482850
assert(BI && PBI && BI->isConditional() && PBI->isConditional() &&
28492851
"Both blocks must end with a conditional branches.");
28502852
assert(is_contained(predecessors(BI->getParent()), PBI->getParent()) &&
28512853
"PredBB must be a predecessor of BB.");
28522854

2853-
if (PBI->getSuccessor(0) == BI->getSuccessor(0))
2854-
return {{Instruction::Or, false}};
2855-
else if (PBI->getSuccessor(1) == BI->getSuccessor(1))
2856-
return {{Instruction::And, false}};
2857-
else if (PBI->getSuccessor(0) == BI->getSuccessor(1))
2858-
return {{Instruction::And, true}};
2859-
else if (PBI->getSuccessor(1) == BI->getSuccessor(0))
2860-
return {{Instruction::Or, true}};
2855+
// We have the potential to fold the conditions together, but if the
2856+
// predecessor branch is predictable, we may not want to merge them.
2857+
uint64_t PTWeight, PFWeight;
2858+
BranchProbability PBITrueProb, Likely;
2859+
if (PBI->extractProfMetadata(PTWeight, PFWeight) &&
2860+
(PTWeight + PFWeight) != 0) {
2861+
PBITrueProb =
2862+
BranchProbability::getBranchProbability(PTWeight, PTWeight + PFWeight);
2863+
Likely = TTI->getPredictableBranchThreshold();
2864+
}
2865+
2866+
if (PBI->getSuccessor(0) == BI->getSuccessor(0)) {
2867+
// Speculate the 2nd condition unless the 1st is probably true.
2868+
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
2869+
return {{Instruction::Or, false}};
2870+
} else if (PBI->getSuccessor(1) == BI->getSuccessor(1)) {
2871+
// Speculate the 2nd condition unless the 1st is probably false.
2872+
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
2873+
return {{Instruction::And, false}};
2874+
} else if (PBI->getSuccessor(0) == BI->getSuccessor(1)) {
2875+
// Speculate the 2nd condition unless the 1st is probably true.
2876+
if (PBITrueProb.isUnknown() || PBITrueProb < Likely)
2877+
return {{Instruction::And, true}};
2878+
} else if (PBI->getSuccessor(1) == BI->getSuccessor(0)) {
2879+
// Speculate the 2nd condition unless the 1st is probably false.
2880+
if (PBITrueProb.isUnknown() || PBITrueProb.getCompl() < Likely)
2881+
return {{Instruction::Or, true}};
2882+
}
28612883
return None;
28622884
}
28632885

2864-
static bool PerformBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
2886+
static bool performBranchToCommonDestFolding(BranchInst *BI, BranchInst *PBI,
28652887
DomTreeUpdater *DTU,
2866-
MemorySSAUpdater *MSSAU) {
2888+
MemorySSAUpdater *MSSAU,
2889+
const TargetTransformInfo *TTI) {
28672890
BasicBlock *BB = BI->getParent();
28682891
BasicBlock *PredBlock = PBI->getParent();
28692892

28702893
// Determine if the two branches share a common destination.
28712894
Instruction::BinaryOps Opc;
28722895
bool InvertPredCond;
28732896
std::tie(Opc, InvertPredCond) =
2874-
*CheckIfCondBranchesShareCommonDestination(BI, PBI);
2897+
*shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI);
28752898

28762899
LLVM_DEBUG(dbgs() << "FOLDING BRANCH TO COMMON DEST:\n" << *PBI << *BB);
28772900

@@ -3059,8 +3082,8 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
30593082
// Determine if the two branches share a common destination.
30603083
Instruction::BinaryOps Opc;
30613084
bool InvertPredCond;
3062-
if (auto Recepie = CheckIfCondBranchesShareCommonDestination(BI, PBI))
3063-
std::tie(Opc, InvertPredCond) = *Recepie;
3085+
if (auto Recipe = shouldFoldCondBranchesToCommonDestination(BI, PBI, TTI))
3086+
std::tie(Opc, InvertPredCond) = *Recipe;
30643087
else
30653088
continue;
30663089

@@ -3077,7 +3100,7 @@ bool llvm::FoldBranchToCommonDest(BranchInst *BI, DomTreeUpdater *DTU,
30773100
continue;
30783101
}
30793102

3080-
return PerformBranchToCommonDestFolding(BI, PBI, DTU, MSSAU);
3103+
return performBranchToCommonDestFolding(BI, PBI, DTU, MSSAU, TTI);
30813104
}
30823105
return Changed;
30833106
}

llvm/test/Transforms/PGOProfile/chr.ll

+7-7
Original file line numberDiff line numberDiff line change
@@ -1277,11 +1277,12 @@ define i32 @test_chr_14(i32* %i, i32* %j, i32 %sum0, i1 %pred, i32 %z) !prof !14
12771277
; CHECK-LABEL: @test_chr_14(
12781278
; CHECK-NEXT: entry:
12791279
; CHECK-NEXT: [[I0:%.*]] = load i32, i32* [[I:%.*]], align 4
1280-
; CHECK-NEXT: [[V1:%.*]] = icmp ne i32 [[Z:%.*]], 1
1280+
; CHECK-NEXT: [[V1:%.*]] = icmp eq i32 [[Z:%.*]], 1
1281+
; CHECK-NEXT: br i1 [[V1]], label [[BB1:%.*]], label [[ENTRY_SPLIT_NONCHR:%.*]], !prof !15
1282+
; CHECK: entry.split.nonchr:
12811283
; CHECK-NEXT: [[V0:%.*]] = icmp eq i32 [[Z]], 0
12821284
; CHECK-NEXT: [[V3_NONCHR:%.*]] = and i1 [[V0]], [[PRED:%.*]]
1283-
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[V1]], [[V3_NONCHR]]
1284-
; CHECK-NEXT: br i1 [[OR_COND]], label [[BB0_NONCHR:%.*]], label [[BB1:%.*]], !prof !19
1285+
; CHECK-NEXT: br i1 [[V3_NONCHR]], label [[BB0_NONCHR:%.*]], label [[BB1]], !prof !16
12851286
; CHECK: bb0.nonchr:
12861287
; CHECK-NEXT: call void @foo()
12871288
; CHECK-NEXT: br label [[BB1]]
@@ -1912,7 +1913,7 @@ define i32 @test_chr_21(i64 %i, i64 %k, i64 %j) !prof !14 {
19121913
; CHECK-NEXT: switch i64 [[I]], label [[BB2:%.*]] [
19131914
; CHECK-NEXT: i64 2, label [[BB3_NONCHR2:%.*]]
19141915
; CHECK-NEXT: i64 86, label [[BB2_NONCHR1:%.*]]
1915-
; CHECK-NEXT: ], !prof !20
1916+
; CHECK-NEXT: ], !prof !19
19161917
; CHECK: bb2:
19171918
; CHECK-NEXT: call void @foo()
19181919
; CHECK-NEXT: call void @foo()
@@ -2489,14 +2490,14 @@ define void @test_chr_24(i32* %i) !prof !14 {
24892490
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[I:%.*]], align 4
24902491
; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[TMP0]], 1
24912492
; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
2492-
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !21
2493+
; CHECK-NEXT: br i1 [[TMP2]], label [[BB1:%.*]], label [[BB0:%.*]], !prof !20
24932494
; CHECK: bb0:
24942495
; CHECK-NEXT: call void @foo()
24952496
; CHECK-NEXT: br label [[BB1]]
24962497
; CHECK: bb1:
24972498
; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP0]], 2
24982499
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 0
2499-
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !21
2500+
; CHECK-NEXT: br i1 [[TMP4]], label [[BB3:%.*]], label [[BB2:%.*]], !prof !20
25002501
; CHECK: bb2:
25012502
; CHECK-NEXT: call void @foo()
25022503
; CHECK-NEXT: br label [[BB3]]
@@ -2550,4 +2551,3 @@ bb3:
25502551
; CHECK: !16 = !{!"branch_weights", i32 0, i32 1}
25512552
; CHECK: !17 = !{!"branch_weights", i32 1, i32 1}
25522553
; CHECK: !18 = !{!"branch_weights", i32 1, i32 0}
2553-
; CHECK: !19 = !{!"branch_weights", i32 0, i32 1000}

llvm/test/Transforms/SimplifyCFG/preserve-branchweights.ll

+41-19
Original file line numberDiff line numberDiff line change
@@ -636,16 +636,17 @@ exit:
636636
ret i32 %outval
637637
}
638638

639-
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
639+
; Merging the icmps with logic-op defeats the purpose of the metadata.
640640
; We can't tell which condition is expensive if they are combined.
641641

642642
define void @or_icmps_harmful(i32 %x, i32 %y, i8* %p) {
643643
; CHECK-LABEL: @or_icmps_harmful(
644644
; CHECK-NEXT: entry:
645645
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
646+
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
647+
; CHECK: rare:
646648
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
647-
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
648-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
649+
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
649650
; CHECK: false:
650651
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
651652
; CHECK-NEXT: br label [[EXIT]]
@@ -668,16 +669,17 @@ exit:
668669
ret void
669670
}
670671

671-
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
672+
; Merging the icmps with logic-op defeats the purpose of the metadata.
672673
; We can't tell which condition is expensive if they are combined.
673674

674675
define void @or_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
675676
; CHECK-LABEL: @or_icmps_harmful_inverted(
676677
; CHECK-NEXT: entry:
677-
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sle i32 [[X:%.*]], -1
678+
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
679+
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
680+
; CHECK: rare:
678681
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
679-
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
680-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !19
682+
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[EXIT]], label [[FALSE:%.*]]
681683
; CHECK: false:
682684
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
683685
; CHECK-NEXT: br label [[EXIT]]
@@ -700,15 +702,16 @@ exit:
700702
ret void
701703
}
702704

703-
; The probability threshold is set by a builtin_expect setting.
705+
; The probability threshold is determined by a TTI setting.
706+
; In this example, we are just short of strongly expected, so speculate.
704707

705708
define void @or_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
706709
; CHECK-LABEL: @or_icmps_not_that_harmful(
707710
; CHECK-NEXT: entry:
708711
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
709712
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
710713
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
711-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !20
714+
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
712715
; CHECK: false:
713716
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
714717
; CHECK-NEXT: br label [[EXIT]]
@@ -731,13 +734,16 @@ exit:
731734
ret void
732735
}
733736

737+
; The probability threshold is determined by a TTI setting.
738+
; In this example, we are just short of strongly expected, so speculate.
739+
734740
define void @or_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
735741
; CHECK-LABEL: @or_icmps_not_that_harmful_inverted(
736742
; CHECK-NEXT: entry:
737743
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
738744
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
739745
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
740-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !21
746+
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
741747
; CHECK: false:
742748
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
743749
; CHECK-NEXT: br label [[EXIT]]
@@ -760,13 +766,15 @@ exit:
760766
ret void
761767
}
762768

769+
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
770+
763771
define void @or_icmps_useful(i32 %x, i32 %y, i8* %p) {
764772
; CHECK-LABEL: @or_icmps_useful(
765773
; CHECK-NEXT: entry:
766774
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
767775
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
768776
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
769-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
777+
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
770778
; CHECK: false:
771779
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
772780
; CHECK-NEXT: br label [[EXIT]]
@@ -789,13 +797,15 @@ exit:
789797
ret void
790798
}
791799

800+
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
801+
792802
define void @or_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
793803
; CHECK-LABEL: @or_icmps_useful_inverted(
794804
; CHECK-NEXT: entry:
795805
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
796806
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
797807
; CHECK-NEXT: [[OR_COND:%.*]] = or i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
798-
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !22
808+
; CHECK-NEXT: br i1 [[OR_COND]], label [[EXIT:%.*]], label [[FALSE:%.*]], !prof !23
799809
; CHECK: false:
800810
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
801811
; CHECK-NEXT: br label [[EXIT]]
@@ -849,16 +859,17 @@ exit:
849859
ret void
850860
}
851861

852-
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
862+
; Merging the icmps with logic-op defeats the purpose of the metadata.
853863
; We can't tell which condition is expensive if they are combined.
854864

855865
define void @and_icmps_harmful(i32 %x, i32 %y, i8* %p) {
856866
; CHECK-LABEL: @and_icmps_harmful(
857867
; CHECK-NEXT: entry:
858868
; CHECK-NEXT: [[EXPECTED_FALSE:%.*]] = icmp sgt i32 [[X:%.*]], -1
869+
; CHECK-NEXT: br i1 [[EXPECTED_FALSE]], label [[RARE:%.*]], label [[EXIT:%.*]], !prof !20
870+
; CHECK: rare:
859871
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
860-
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_FALSE]], [[EXPENSIVE]]
861-
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
872+
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
862873
; CHECK: false:
863874
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
864875
; CHECK-NEXT: br label [[EXIT]]
@@ -881,16 +892,17 @@ exit:
881892
ret void
882893
}
883894

884-
; FIXME: Merging the icmps with logic-op defeats the purpose of the metadata.
895+
; Merging the icmps with logic-op defeats the purpose of the metadata.
885896
; We can't tell which condition is expensive if they are combined.
886897

887898
define void @and_icmps_harmful_inverted(i32 %x, i32 %y, i8* %p) {
888899
; CHECK-LABEL: @and_icmps_harmful_inverted(
889900
; CHECK-NEXT: entry:
890-
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sle i32 [[X:%.*]], -1
901+
; CHECK-NEXT: [[EXPECTED_TRUE:%.*]] = icmp sgt i32 [[X:%.*]], -1
902+
; CHECK-NEXT: br i1 [[EXPECTED_TRUE]], label [[EXIT:%.*]], label [[RARE:%.*]], !prof !19
903+
; CHECK: rare:
891904
; CHECK-NEXT: [[EXPENSIVE:%.*]] = icmp eq i32 [[Y:%.*]], 0
892-
; CHECK-NEXT: [[OR_COND:%.*]] = and i1 [[EXPECTED_TRUE]], [[EXPENSIVE]]
893-
; CHECK-NEXT: br i1 [[OR_COND]], label [[FALSE:%.*]], label [[EXIT:%.*]], !prof !23
905+
; CHECK-NEXT: br i1 [[EXPENSIVE]], label [[FALSE:%.*]], label [[EXIT]]
894906
; CHECK: false:
895907
; CHECK-NEXT: store i8 42, i8* [[P:%.*]], align 1
896908
; CHECK-NEXT: br label [[EXIT]]
@@ -913,6 +925,9 @@ exit:
913925
ret void
914926
}
915927

928+
; The probability threshold is determined by a TTI setting.
929+
; In this example, we are just short of strongly expected, so speculate.
930+
916931
define void @and_icmps_not_that_harmful(i32 %x, i32 %y, i8* %p) {
917932
; CHECK-LABEL: @and_icmps_not_that_harmful(
918933
; CHECK-NEXT: entry:
@@ -942,6 +957,9 @@ exit:
942957
ret void
943958
}
944959

960+
; The probability threshold is determined by a TTI setting.
961+
; In this example, we are just short of strongly expected, so speculate.
962+
945963
define void @and_icmps_not_that_harmful_inverted(i32 %x, i32 %y, i8* %p) {
946964
; CHECK-LABEL: @and_icmps_not_that_harmful_inverted(
947965
; CHECK-NEXT: entry:
@@ -971,6 +989,8 @@ exit:
971989
ret void
972990
}
973991

992+
; The 1st cmp is probably true, so speculating the 2nd is probably a win.
993+
974994
define void @and_icmps_useful(i32 %x, i32 %y, i8* %p) {
975995
; CHECK-LABEL: @and_icmps_useful(
976996
; CHECK-NEXT: entry:
@@ -1000,6 +1020,8 @@ exit:
10001020
ret void
10011021
}
10021022

1023+
; The 1st cmp is probably false, so speculating the 2nd is probably a win.
1024+
10031025
define void @and_icmps_useful_inverted(i32 %x, i32 %y, i8* %p) {
10041026
; CHECK-LABEL: @and_icmps_useful_inverted(
10051027
; CHECK-NEXT: entry:

0 commit comments

Comments
 (0)