Skip to content

Commit f0e34f3

Browse files
committed
[VPlan] Don't skip optimizable truncs in planContainsAdditionalSimps.
A optimizable cast can also be removed by VPlan simplifications. Remove the restriction from planContainsAdditionalSimplifications, as this causes it to miss relevant simplifications, triggering false positives for the cost decision verification. Also adds debug output for printing additional cost-precomputations. Fixes #106641.
1 parent c8568f0 commit f0e34f3

File tree

2 files changed

+90
-17
lines changed

2 files changed

+90
-17
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 17 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -7147,7 +7147,12 @@ LoopVectorizationPlanner::precomputeCosts(VPlan &Plan, ElementCount VF,
71477147
if (!OrigLoop->contains(CondI) ||
71487148
!CostCtx.SkipCostComputation.insert(CondI).second)
71497149
continue;
7150-
Cost += CostCtx.getLegacyCost(CondI, VF);
7150+
InstructionCost CondICost = CostCtx.getLegacyCost(CondI, VF);
7151+
LLVM_DEBUG({
7152+
dbgs() << "Cost of " << CondICost << " for VF " << VF
7153+
<< ": exit condition instruction " << *CondI << "\n";
7154+
});
7155+
Cost += CondICost;
71517156
for (Value *Op : CondI->operands()) {
71527157
auto *OpI = dyn_cast<Instruction>(Op);
71537158
if (!OpI || any_of(OpI->users(), [&ExitInstrs, this](User *U) {
@@ -7250,10 +7255,9 @@ InstructionCost LoopVectorizationPlanner::cost(VPlan &Plan,
72507255
/// not have corresponding recipes in \p Plan and are not marked to be ignored
72517256
/// in \p CostCtx. This means the VPlan contains simplification that the legacy
72527257
/// cost-model did not account for.
7253-
static bool
7254-
planContainsAdditionalSimplifications(VPlan &Plan, ElementCount VF,
7255-
VPCostContext &CostCtx, Loop *TheLoop,
7256-
LoopVectorizationCostModel &CM) {
7258+
static bool planContainsAdditionalSimplifications(VPlan &Plan,
7259+
VPCostContext &CostCtx,
7260+
Loop *TheLoop) {
72577261
// First collect all instructions for the recipes in Plan.
72587262
auto GetInstructionForCost = [](const VPRecipeBase *R) -> Instruction * {
72597263
if (auto *S = dyn_cast<VPSingleDefRecipe>(R))
@@ -7284,16 +7288,13 @@ planContainsAdditionalSimplifications(VPlan &Plan, ElementCount VF,
72847288
// Return true if the loop contains any instructions that are not also part of
72857289
// the VPlan or are skipped for VPlan-based cost computations. This indicates
72867290
// that the VPlan contains extra simplifications.
7287-
return any_of(
7288-
TheLoop->blocks(), [&SeenInstrs, VF, &CostCtx, &CM](BasicBlock *BB) {
7289-
return any_of(*BB, [&SeenInstrs, VF, &CostCtx, &CM](Instruction &I) {
7290-
if (isa<PHINode>(&I))
7291-
return false;
7292-
return !SeenInstrs.contains(&I) &&
7293-
!CostCtx.skipCostComputation(&I, true) &&
7294-
!CM.canTruncateToMinimalBitwidth(&I, VF);
7295-
});
7296-
});
7291+
return any_of(TheLoop->blocks(), [&SeenInstrs, &CostCtx](BasicBlock *BB) {
7292+
return any_of(*BB, [&SeenInstrs, &CostCtx](Instruction &I) {
7293+
if (isa<PHINode>(&I))
7294+
return false;
7295+
return !SeenInstrs.contains(&I) && !CostCtx.skipCostComputation(&I, true);
7296+
});
7297+
});
72977298
}
72987299
#endif
72997300

@@ -7364,8 +7365,7 @@ VectorizationFactor LoopVectorizationPlanner::computeBestVF() {
73647365
precomputeCosts(BestPlan, BestFactor.Width, CostCtx);
73657366
assert((BestFactor.Width == LegacyVF.Width ||
73667367
planContainsAdditionalSimplifications(getPlanFor(BestFactor.Width),
7367-
BestFactor.Width, CostCtx,
7368-
OrigLoop, CM)) &&
7368+
CostCtx, OrigLoop)) &&
73697369
" VPlan cost model and legacy cost model disagreed");
73707370
assert((BestFactor.Width.isScalar() || BestFactor.ScalarCost > 0) &&
73717371
"when vectorizing, the scalar cost must be computed.");

llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-cost.ll

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,11 +152,84 @@ exit:
152152
ret void
153153
}
154154

155+
; Test case for https://github.com/llvm/llvm-project/issues/106641.
156+
define void @truncate_to_i1_used_by_branch(i8 %x, ptr %dst) #0 {
157+
; CHECK-LABEL: define void @truncate_to_i1_used_by_branch(
158+
; CHECK-SAME: i8 [[X:%.*]], ptr [[DST:%.*]]) #[[ATTR1:[0-9]+]] {
159+
; CHECK-NEXT: [[ENTRY:.*]]:
160+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
161+
; CHECK: [[VECTOR_PH]]:
162+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[X]], i64 0
163+
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i8> [[BROADCAST_SPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
164+
; CHECK-NEXT: [[TMP0:%.*]] = trunc <2 x i8> [[BROADCAST_SPLAT]] to <2 x i1>
165+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <2 x ptr> poison, ptr [[DST]], i64 0
166+
; CHECK-NEXT: [[BROADCAST_SPLAT4:%.*]] = shufflevector <2 x ptr> [[BROADCAST_SPLATINSERT3]], <2 x ptr> poison, <2 x i32> zeroinitializer
167+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
168+
; CHECK: [[VECTOR_BODY]]:
169+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
170+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[INDEX]], i64 0
171+
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
172+
; CHECK-NEXT: [[VEC_IV:%.*]] = add <2 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1>
173+
; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x i32> [[VEC_IV]], i32 0
174+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = call <2 x i1> @llvm.get.active.lane.mask.v2i1.i32(i32 [[TMP1]], i32 2)
175+
; CHECK-NEXT: [[TMP2:%.*]] = or <2 x i1> <i1 true, i1 true>, [[TMP0]]
176+
; CHECK-NEXT: [[TMP3:%.*]] = select <2 x i1> [[ACTIVE_LANE_MASK]], <2 x i1> [[TMP2]], <2 x i1> zeroinitializer
177+
; CHECK-NEXT: call void @llvm.masked.scatter.v2i8.v2p0(<2 x i8> zeroinitializer, <2 x ptr> [[BROADCAST_SPLAT4]], i32 1, <2 x i1> [[TMP3]])
178+
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 2
179+
; CHECK-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
180+
; CHECK: [[MIDDLE_BLOCK]]:
181+
; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
182+
; CHECK: [[SCALAR_PH]]:
183+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i8 [ 2, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
184+
; CHECK-NEXT: br label %[[LOOP_HEADER:.*]]
185+
; CHECK: [[LOOP_HEADER]]:
186+
; CHECK-NEXT: [[F_039:%.*]] = phi i8 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[ADD:%.*]], %[[LOOP_LATCH:.*]] ]
187+
; CHECK-NEXT: [[TMP4:%.*]] = or i8 23, [[X]]
188+
; CHECK-NEXT: [[EXTRACT_T:%.*]] = trunc i8 [[TMP4]] to i1
189+
; CHECK-NEXT: br i1 [[EXTRACT_T]], label %[[THEN:.*]], label %[[LOOP_LATCH]]
190+
; CHECK: [[THEN]]:
191+
; CHECK-NEXT: store i8 0, ptr [[DST]], align 1
192+
; CHECK-NEXT: br label %[[LOOP_LATCH]]
193+
; CHECK: [[LOOP_LATCH]]:
194+
; CHECK-NEXT: [[ADD]] = add i8 [[F_039]], 1
195+
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[F_039]] to i32
196+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], 1
197+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP_HEADER]], label %[[EXIT]], !llvm.loop [[LOOP7:![0-9]+]]
198+
; CHECK: [[EXIT]]:
199+
; CHECK-NEXT: ret void
200+
;
201+
entry:
202+
br label %loop.header
203+
204+
loop.header:
205+
%f.039 = phi i8 [ 0, %entry ], [ %add, %loop.latch ]
206+
%0 = or i8 23, %x
207+
%extract.t = trunc i8 %0 to i1
208+
br i1 %extract.t, label %then, label %loop.latch
209+
210+
then:
211+
store i8 0, ptr %dst, align 1
212+
br label %loop.latch
213+
214+
loop.latch:
215+
%add = add i8 %f.039, 1
216+
%conv = sext i8 %f.039 to i32
217+
%cmp = icmp slt i32 %conv, 1
218+
br i1 %cmp, label %loop.header, label %exit
219+
220+
exit:
221+
ret void
222+
}
223+
224+
attributes #0 = { "target-features"="+64bit,+v,+zvl256b" }
225+
155226
;.
156227
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
157228
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
158229
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
159230
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
160231
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
161232
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
233+
; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
234+
; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
162235
;.

0 commit comments

Comments
 (0)