Skip to content

Commit ba8126b

Browse files
committed
[LV] Mark dead instructions in loop as free.
Update collectValuesToIgnore to also ignore dead instructions in the loop. Such instructions will be removed by VPlan-based DCE and won't be considered by the VPlan-based cost model. This closes a gap between the legacy and VPlan-based cost model. In practice with the default pipelines, there shouldn't be any dead instructions in loops reaching LoopVectorize, but it is easy to generate such cases by hand or automatically via fuzzers. Fixes llvm#99701.
1 parent 9a25866 commit ba8126b

File tree

2 files changed

+130
-0
lines changed

2 files changed

+130
-0
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@
137137
#include "llvm/Support/raw_ostream.h"
138138
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
139139
#include "llvm/Transforms/Utils/InjectTLIMappings.h"
140+
#include "llvm/Transforms/Utils/Local.h"
140141
#include "llvm/Transforms/Utils/LoopSimplify.h"
141142
#include "llvm/Transforms/Utils/LoopUtils.h"
142143
#include "llvm/Transforms/Utils/LoopVersioning.h"
@@ -6681,6 +6682,7 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
66816682
CodeMetrics::collectEphemeralValues(TheLoop, AC, ValuesToIgnore);
66826683

66836684
SmallVector<Value *, 4> DeadInterleavePointerOps;
6685+
SmallVector<Value *, 4> DeadOps;
66846686
for (BasicBlock *BB : TheLoop->blocks())
66856687
for (Instruction &I : *BB) {
66866688
// Find all stores to invariant variables. Since they are going to sink
@@ -6690,6 +6692,17 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
66906692
Legal->isInvariantAddressOfReduction(SI->getPointerOperand()))
66916693
ValuesToIgnore.insert(&I);
66926694

6695+
if (VecValuesToIgnore.contains(&I) || ValuesToIgnore.contains(&I))
6696+
continue;
6697+
6698+
// Add instructions that would be trivially dead and are only used by
6699+
// values already ignored to DeadOps to seed worklist.
6700+
if (wouldInstructionBeTriviallyDead(&I, TLI) &&
6701+
all_of(I.users(), [this](User *U) {
6702+
return VecValuesToIgnore.contains(U) || ValuesToIgnore.contains(U);
6703+
}))
6704+
DeadOps.push_back(&I);
6705+
66936706
// For interleave groups, we only create a pointer for the start of the
66946707
// interleave group. Queue up addresses of group members except the insert
66956708
// position for further processing.
@@ -6717,6 +6730,29 @@ void LoopVectorizationCostModel::collectValuesToIgnore() {
67176730
DeadInterleavePointerOps.append(Op->op_begin(), Op->op_end());
67186731
}
67196732

6733+
// Mark ops that would be trivially dead and are only used by ignored
6734+
// instructions as free.
6735+
for (unsigned I = 0; I != DeadOps.size(); ++I) {
6736+
auto *Op = dyn_cast<Instruction>(DeadOps[I]);
6737+
// Skip any op that shouldn't be considered dead.
6738+
if (!Op || !TheLoop->contains(Op) ||
6739+
!wouldInstructionBeTriviallyDead(Op, TLI) ||
6740+
any_of(Op->users(), [this](User *U) {
6741+
return !VecValuesToIgnore.contains(U) && !ValuesToIgnore.contains(U);
6742+
}))
6743+
continue;
6744+
6745+
// If all of Op's users are in ValuesToIgnore, add it to ValuesToIgnore
6746+
// which applies for both scalar and vector versions. Otherwise it is only
6747+
// dead in vector versions, so only add it to VecValuesToIgnore.
6748+
if (all_of(Op->users(),
6749+
[this](User *U) { return ValuesToIgnore.contains(U); }))
6750+
ValuesToIgnore.insert(Op);
6751+
6752+
VecValuesToIgnore.insert(Op);
6753+
DeadOps.append(Op->op_begin(), Op->op_end());
6754+
}
6755+
67206756
// Ignore type-promoting instructions we identified during reduction
67216757
// detection.
67226758
for (const auto &Reduction : Legal->getReductionVars()) {
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
2+
; RUN: opt -p loop-vectorize -mtriple riscv64-linux-gnu -mattr=+v,+f -S %s | FileCheck %s
3+
4+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
5+
6+
; Test with a dead load in the loop, from
7+
; https://github.com/llvm/llvm-project/issues/99701
8+
define void @dead_load(ptr %p, i16 %start) {
9+
; CHECK-LABEL: define void @dead_load(
10+
; CHECK-SAME: ptr [[P:%.*]], i16 [[START:%.*]]) #[[ATTR0:[0-9]+]] {
11+
; CHECK-NEXT: [[ENTRY:.*]]:
12+
; CHECK-NEXT: [[START_EXT:%.*]] = sext i16 [[START]] to i64
13+
; CHECK-NEXT: [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[START_EXT]], i64 111)
14+
; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[SMAX]], [[START_EXT]]
15+
; CHECK-NEXT: [[UMIN:%.*]] = call i64 @llvm.umin.i64(i64 [[TMP0]], i64 1)
16+
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[SMAX]], [[UMIN]]
17+
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[TMP1]], [[START_EXT]]
18+
; CHECK-NEXT: [[TMP3:%.*]] = udiv i64 [[TMP2]], 3
19+
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[UMIN]], [[TMP3]]
20+
; CHECK-NEXT: [[TMP5:%.*]] = add i64 [[TMP4]], 1
21+
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
22+
; CHECK-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8
23+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ule i64 [[TMP5]], [[TMP7]]
24+
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
25+
; CHECK: [[VECTOR_PH]]:
26+
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
27+
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[TMP8]], 8
28+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP5]], [[TMP9]]
29+
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
30+
; CHECK-NEXT: [[TMP11:%.*]] = select i1 [[TMP10]], i64 [[TMP9]], i64 [[N_MOD_VF]]
31+
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP5]], [[TMP11]]
32+
; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 3
33+
; CHECK-NEXT: [[IND_END:%.*]] = add i64 [[START_EXT]], [[TMP12]]
34+
; CHECK-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
35+
; CHECK-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 8
36+
; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[START_EXT]], i64 0
37+
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
38+
; CHECK-NEXT: [[TMP15:%.*]] = call <vscale x 8 x i64> @llvm.experimental.stepvector.nxv8i64()
39+
; CHECK-NEXT: [[TMP16:%.*]] = add <vscale x 8 x i64> [[TMP15]], zeroinitializer
40+
; CHECK-NEXT: [[TMP17:%.*]] = mul <vscale x 8 x i64> [[TMP16]], shufflevector (<vscale x 8 x i64> insertelement (<vscale x 8 x i64> poison, i64 3, i64 0), <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer)
41+
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 8 x i64> [[DOTSPLAT]], [[TMP17]]
42+
; CHECK-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64()
43+
; CHECK-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 8
44+
; CHECK-NEXT: [[TMP20:%.*]] = mul i64 3, [[TMP19]]
45+
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 8 x i64> poison, i64 [[TMP20]], i64 0
46+
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
47+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
48+
; CHECK: [[VECTOR_BODY]]:
49+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
50+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 8 x i64> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
51+
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
52+
; CHECK-NEXT: call void @llvm.masked.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> [[TMP21]], i32 2, <vscale x 8 x i1> shufflevector (<vscale x 8 x i1> insertelement (<vscale x 8 x i1> poison, i1 true, i64 0), <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer))
53+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP14]]
54+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
55+
; CHECK-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
56+
; CHECK-NEXT: br i1 [[TMP22]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
57+
; CHECK: [[MIDDLE_BLOCK]]:
58+
; CHECK-NEXT: br label %[[SCALAR_PH]]
59+
; CHECK: [[SCALAR_PH]]:
60+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], %[[MIDDLE_BLOCK]] ], [ [[START_EXT]], %[[ENTRY]] ]
61+
; CHECK-NEXT: br label %[[LOOP:.*]]
62+
; CHECK: [[LOOP]]:
63+
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
64+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i16, ptr [[P]], i64 [[IV]]
65+
; CHECK-NEXT: store i16 0, ptr [[GEP]], align 2
66+
; CHECK-NEXT: [[L:%.*]] = load i16, ptr [[GEP]], align 2
67+
; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 3
68+
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[IV]], 111
69+
; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]], !llvm.loop [[LOOP3:![0-9]+]]
70+
; CHECK: [[EXIT]]:
71+
; CHECK-NEXT: ret void
72+
;
73+
entry:
74+
%start.ext = sext i16 %start to i64
75+
br label %loop
76+
77+
loop:
78+
%iv = phi i64 [ %start.ext, %entry ], [ %iv.next, %loop ]
79+
%gep = getelementptr i16, ptr %p, i64 %iv
80+
store i16 0, ptr %gep, align 2
81+
%l = load i16, ptr %gep, align 2
82+
%iv.next = add i64 %iv, 3
83+
%cmp = icmp slt i64 %iv, 111
84+
br i1 %cmp, label %loop, label %exit
85+
86+
exit:
87+
ret void
88+
}
89+
;.
90+
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
91+
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
92+
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
93+
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
94+
;.

0 commit comments

Comments
 (0)