Skip to content

Commit 783a846

Browse files
committed
[VPlan] Add VF as operand to VPScalarIVStepsRecipe.
Similarly to other recipes, update VPScalarIVStepsRecipe to also take the runtime VF as argument. This removes some unnecessary runtime VF computations for scalable vectors. It will also allow dropping the UF == 1 restriction for narrowing interleave groups required in 577631f.
1 parent 3026fa0 commit 783a846

32 files changed

+180
-99
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -262,9 +262,10 @@ class VPBuilder {
262262

263263
VPScalarIVStepsRecipe *
264264
createScalarIVSteps(Instruction::BinaryOps InductionOpcode,
265-
FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step) {
265+
FPMathOperator *FPBinOp, VPValue *IV, VPValue *Step,
266+
VPValue *VF) {
266267
return tryInsertInstruction(new VPScalarIVStepsRecipe(
267-
IV, Step, InductionOpcode,
268+
IV, Step, VF, InductionOpcode,
268269
FPBinOp ? FPBinOp->getFastMathFlags() : FastMathFlags()));
269270
}
270271

llvm/lib/Transforms/Vectorize/VPlan.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3069,20 +3069,20 @@ class VPDerivedIVRecipe : public VPSingleDefRecipe {
30693069
/// A recipe for handling phi nodes of integer and floating-point inductions,
30703070
/// producing their scalar values.
30713071
class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
3072-
public VPUnrollPartAccessor<2> {
3072+
public VPUnrollPartAccessor<3> {
30733073
Instruction::BinaryOps InductionOpcode;
30743074

30753075
public:
3076-
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step,
3076+
VPScalarIVStepsRecipe(VPValue *IV, VPValue *Step, VPValue *VF,
30773077
Instruction::BinaryOps Opcode, FastMathFlags FMFs)
30783078
: VPRecipeWithIRFlags(VPDef::VPScalarIVStepsSC,
3079-
ArrayRef<VPValue *>({IV, Step}), FMFs),
3079+
ArrayRef<VPValue *>({IV, Step, VF}), FMFs),
30803080
InductionOpcode(Opcode) {}
30813081

30823082
VPScalarIVStepsRecipe(const InductionDescriptor &IndDesc, VPValue *IV,
3083-
VPValue *Step)
3083+
VPValue *Step, VPValue *VF)
30843084
: VPScalarIVStepsRecipe(
3085-
IV, Step, IndDesc.getInductionOpcode(),
3085+
IV, Step, VF, IndDesc.getInductionOpcode(),
30863086
dyn_cast_or_null<FPMathOperator>(IndDesc.getInductionBinOp())
30873087
? IndDesc.getInductionBinOp()->getFastMathFlags()
30883088
: FastMathFlags()) {}
@@ -3091,7 +3091,7 @@ class VPScalarIVStepsRecipe : public VPRecipeWithIRFlags,
30913091

30923092
VPScalarIVStepsRecipe *clone() override {
30933093
return new VPScalarIVStepsRecipe(
3094-
getOperand(0), getOperand(1), InductionOpcode,
3094+
getOperand(0), getOperand(1), getOperand(2), InductionOpcode,
30953095
hasFastMathFlags() ? getFastMathFlags() : FastMathFlags());
30963096
}
30973097

llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2071,8 +2071,18 @@ void VPScalarIVStepsRecipe::execute(VPTransformState &State) {
20712071
StartLane = State.Lane->getKnownLane();
20722072
EndLane = StartLane + 1;
20732073
}
2074-
Value *StartIdx0 =
2075-
createStepForVF(Builder, IntStepTy, State.VF, getUnrollPart(*this));
2074+
Value *StartIdx0;
2075+
if (getUnrollPart(*this) == 0)
2076+
StartIdx0 = ConstantInt::get(IntStepTy, 0);
2077+
else {
2078+
StartIdx0 = State.get(getOperand(2), true);
2079+
if (getUnrollPart(*this) != 1) {
2080+
StartIdx0 =
2081+
Builder.CreateMul(StartIdx0, ConstantInt::get(StartIdx0->getType(),
2082+
getUnrollPart(*this)));
2083+
}
2084+
StartIdx0 = Builder.CreateTrunc(StartIdx0, IntStepTy);
2085+
}
20762086

20772087
if (!FirstLaneOnly && State.VF.isScalable()) {
20782088
auto *SplatStartIdx = Builder.CreateVectorSplat(State.VF, StartIdx0);

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -575,7 +575,8 @@ createScalarIVSteps(VPlan &Plan, InductionDescriptor::InductionKind Kind,
575575
Builder.setInsertPoint(VecPreheader);
576576
Step = Builder.createScalarCast(Instruction::Trunc, Step, ResultTy, DL);
577577
}
578-
return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step);
578+
return Builder.createScalarIVSteps(InductionOpcode, FPBinOp, BaseIV, Step,
579+
&Plan.getVF());
579580
}
580581

581582
static SmallVector<VPUser *> collectUsersRecursively(VPValue *V) {

llvm/test/Transforms/LoopVectorize/AArch64/sve-gather-scatter.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -299,10 +299,9 @@ define void @gather_nxv4i32_ind64_stride2(ptr noalias nocapture %a, ptr noalias
299299
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
300300
; CHECK: vector.body:
301301
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
302-
; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
303302
; CHECK-NEXT: [[DOTIDX1:%.*]] = shl i64 [[INDEX]], 3
304303
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i8, ptr [[B:%.*]], i64 [[DOTIDX1]]
305-
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP9]], 5
304+
; CHECK-NEXT: [[DOTIDX3:%.*]] = shl nuw nsw i64 [[TMP3]], 5
306305
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr i8, ptr [[B]], i64 [[DOTIDX3]]
307306
; CHECK-NEXT: [[DOTIDX4:%.*]] = shl i64 [[INDEX]], 3
308307
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr i8, ptr [[TMP11]], i64 [[DOTIDX4]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-tail-folding-forced.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
1010

1111
; VPLANS-LABEL: Checking a loop in 'simple_memset'
1212
; VPLANS: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' {
13+
; VPLANS-NEXT: Live-in vp<[[VF:%.+]]> = VF
1314
; VPLANS-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1415
; VPLANS: vp<[[TC:%[0-9]+]]> = original trip-count
1516
; VPLANS-EMPTY:
@@ -19,15 +20,15 @@ target triple = "aarch64-unknown-linux-gnu"
1920
; VPLANS-EMPTY:
2021
; VPLANS-NEXT: vector.ph:
2122
; VPLANS-NEXT: EMIT vp<[[NEWTC:%[0-9]+]]> = TC > VF ? TC - VF : 0 vp<[[TC]]>
22-
; VPLANS-NEXT: EMIT vp<[[VF:%.+]]> = VF * Part + ir<0>
23-
; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF]]>, vp<[[TC]]>
23+
; VPLANS-NEXT: EMIT vp<[[VF_PER_PART:%.+]]> = VF * Part + ir<0>
24+
; VPLANS-NEXT: EMIT vp<[[LANEMASK_ENTRY:%.+]]> = active lane mask vp<[[VF_PER_PART]]>, vp<[[TC]]>
2425
; VPLANS-NEXT: Successor(s): vector loop
2526
; VPLANS-EMPTY:
2627
; VPLANS-NEXT: <x1> vector loop: {
2728
; VPLANS-NEXT: vector.body:
2829
; VPLANS-NEXT: EMIT vp<[[INDV:%[0-9]+]]> = CANONICAL-INDUCTION
2930
; VPLANS-NEXT: ACTIVE-LANE-MASK-PHI vp<[[LANEMASK_PHI:%[0-9]+]]> = phi vp<[[LANEMASK_ENTRY]]>, vp<[[LANEMASK_LOOP:%.+]]>
30-
; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1>
31+
; VPLANS-NEXT: vp<[[STEP:%[0-9]+]]> = SCALAR-STEPS vp<[[INDV]]>, ir<1>, vp<[[VF]]>
3132
; VPLANS-NEXT: CLONE ir<%gep> = getelementptr ir<%ptr>, vp<[[STEP]]>
3233
; VPLANS-NEXT: vp<[[VEC_PTR:%[0-9]+]]> = vector-pointer ir<%gep>
3334
; VPLANS-NEXT: WIDEN store vp<[[VEC_PTR]]>, ir<%val>, vp<[[LANEMASK_PHI]]>

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-gep.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ target triple = "aarch64-unknown-linux-gnu"
99
; CHECK-NOT: LV: Found {{.*}} scalar instruction: %ptr.iv.2.next = getelementptr inbounds i8, ptr %ptr.iv.2, i64 1
1010
;
1111
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2},UF>=1' {
12+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
1213
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1314
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
1415
; CHECK-NEXT: Live-in ir<%N> = original trip-count
@@ -26,7 +27,7 @@ target triple = "aarch64-unknown-linux-gnu"
2627
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
2728
; CHECK-NEXT: EMIT ir<%ptr.iv.2> = WIDEN-POINTER-INDUCTION ir<%start.2>, ir<1>
2829
; CHECK-NEXT: vp<[[PTR_IDX:%.+]]> = DERIVED-IV ir<0> + vp<[[CAN_IV]]> * ir<8>
29-
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>
30+
; CHECK-NEXT: vp<[[PTR_IDX_STEPS:%.+]]> = SCALAR-STEPS vp<[[PTR_IDX]]>, ir<8>, vp<[[VF]]>
3031
; CHECK-NEXT: EMIT vp<[[PTR_IV_1:%.+]]> = ptradd ir<%start.1>, vp<[[PTR_IDX_STEPS]]>
3132
; CHECK-NEXT: WIDEN-GEP Var[Inv] ir<%ptr.iv.2.next> = getelementptr inbounds ir<%ptr.iv.2>, ir<1>
3233
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer vp<[[PTR_IV_1]]>

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-phi.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ define void @widen_ptr_phi_unrolled(ptr noalias nocapture %a, ptr noalias nocapt
3333
; CHECK: vector.body:
3434
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3535
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
36-
; CHECK-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
37-
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP6]], 5
36+
; CHECK-NEXT: [[TMP7:%.*]] = shl nuw nsw i64 [[TMP4]], 5
3837
; CHECK-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]]
3938
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[C]], i64 [[OFFSET_IDX]]
4039
; CHECK-NEXT: [[NEXT_GEP2:%.*]] = getelementptr i8, ptr [[TMP8]], i64 [[TMP7]]

llvm/test/Transforms/LoopVectorize/AArch64/sve2-histcnt-vplan.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ target triple = "aarch64-unknown-linux-gnu"
1212

1313
;; Check that the scalar plan contains the original instructions.
1414
; CHECK: VPlan 'Initial VPlan for VF={1},UF>=1' {
15+
; CHECK-NEXT: Live-in [[VF:.*]] = VF
1516
; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF
1617
; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count
1718
; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count
@@ -25,7 +26,7 @@ target triple = "aarch64-unknown-linux-gnu"
2526
; CHECK-NEXT: <x1> vector loop: {
2627
; CHECK-NEXT: vector.body:
2728
; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]]
28-
; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>
29+
; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]]
2930
; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
3031
; CHECK-NEXT: CLONE [[IDX:.*]] = load [[GEP_IDX]]
3132
; CHECK-NEXT: CLONE [[EXT_IDX:.*]] = zext [[IDX]]
@@ -59,6 +60,7 @@ target triple = "aarch64-unknown-linux-gnu"
5960

6061
;; Check that the vectorized plan contains a histogram recipe instead.
6162
; CHECK: VPlan 'Initial VPlan for VF={vscale x 2,vscale x 4},UF>=1' {
63+
; CHECK-NEXT: Live-in [[VF:.*]] = VF
6264
; CHECK-NEXT: Live-in [[VFxUF:.*]] = VF * UF
6365
; CHECK-NEXT: Live-in [[VTC:.*]] = vector-trip-count
6466
; CHECK-NEXT: Live-in [[OTC:.*]] = original trip-count
@@ -72,7 +74,7 @@ target triple = "aarch64-unknown-linux-gnu"
7274
; CHECK-NEXT: <x1> vector loop: {
7375
; CHECK-NEXT: vector.body:
7476
; CHECK-NEXT: EMIT [[IV:.*]] = CANONICAL-INDUCTION ir<0>, [[IV_NEXT:.*]]
75-
; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>
77+
; CHECK-NEXT: [[STEPS:vp.*]] = SCALAR-STEPS [[IV]], ir<1>, [[VF]]
7678
; CHECK-NEXT: CLONE [[GEP_IDX:.*]] = getelementptr inbounds ir<%indices>, [[STEPS]]
7779
; CHECK-NEXT: [[VECP_IDX:vp.*]] = vector-pointer [[GEP_IDX]]
7880
; CHECK-NEXT: WIDEN [[IDX:.*]] = load [[VECP_IDX]]

llvm/test/Transforms/LoopVectorize/AArch64/synthesize-mask-for-call.ll

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ target triple = "aarch64-unknown-linux-gnu"
1010

1111
; CHECK-LABEL: LV: Checking a loop in 'test_v4_v4m'
1212
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
13+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
1314
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1415
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
1516
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -23,7 +24,7 @@ target triple = "aarch64-unknown-linux-gnu"
2324
; CHECK-NEXT: <x1> vector loop: {
2425
; CHECK-NEXT: vector.body:
2526
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
26-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
27+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
2728
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
2829
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
2930
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
@@ -56,6 +57,7 @@ target triple = "aarch64-unknown-linux-gnu"
5657
; CHECK-NEXT: }
5758

5859
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
60+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
5961
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
6062
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
6163
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -69,7 +71,7 @@ target triple = "aarch64-unknown-linux-gnu"
6971
; CHECK-NEXT: <x1> vector loop: {
7072
; CHECK-NEXT: vector.body:
7173
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
72-
; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
74+
; CHECK-NEXT: vp<[[STEPS]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
7375
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
7476
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
7577
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
@@ -107,6 +109,7 @@ target triple = "aarch64-unknown-linux-gnu"
107109

108110
; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4m'
109111
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
112+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF
110113
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
111114
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
112115
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -120,7 +123,7 @@ target triple = "aarch64-unknown-linux-gnu"
120123
; CHECK-NEXT: <x1> vector loop: {
121124
; CHECK-NEXT: vector.body:
122125
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
123-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
126+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
124127
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
125128
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
126129
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
@@ -153,6 +156,7 @@ target triple = "aarch64-unknown-linux-gnu"
153156
; CHECK-NEXT: }
154157

155158
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
159+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF
156160
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
157161
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
158162
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -166,7 +170,7 @@ target triple = "aarch64-unknown-linux-gnu"
166170
; CHECK-NEXT: <x1> vector loop: {
167171
; CHECK-NEXT: vector.body:
168172
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
169-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
173+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
170174
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
171175
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
172176
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
@@ -203,6 +207,7 @@ target triple = "aarch64-unknown-linux-gnu"
203207

204208
; CHECK-LABEL: LV: Checking a loop in 'test_v2_v4'
205209
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
210+
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF
206211
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
207212
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
208213
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -216,7 +221,7 @@ target triple = "aarch64-unknown-linux-gnu"
216221
; CHECK-NEXT: <x1> vector loop: {
217222
; CHECK-NEXT: vector.body:
218223
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
219-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
224+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
220225
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
221226
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
222227
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>
@@ -249,6 +254,7 @@ target triple = "aarch64-unknown-linux-gnu"
249254
; CHECK-NEXT: }
250255

251256
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
257+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
252258
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
253259
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
254260
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -262,7 +268,7 @@ target triple = "aarch64-unknown-linux-gnu"
262268
; CHECK-NEXT: <x1> vector loop: {
263269
; CHECK-NEXT: vector.body:
264270
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
265-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
271+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
266272
; CHECK-NEXT: CLONE ir<%gep> = getelementptr ir<%b>, vp<[[STEPS]]>
267273
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep>
268274
; CHECK-NEXT: WIDEN ir<%load> = load vp<[[VEC_PTR]]>

llvm/test/Transforms/LoopVectorize/AArch64/vplan-printing.ll

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ target triple = "aarch64-none-unknown-elf"
88

99
define i32 @print_partial_reduction(ptr %a, ptr %b) {
1010
; CHECK: VPlan 'Initial VPlan for VF={8,16},UF>=1' {
11+
; CHECK-NEXT: Live-in vp<[[VF:%.]]> = VF
1112
; CHECK-NEXT: Live-in vp<[[VFxUF:%.]]> = VF * UF
1213
; CHECK-NEXT: Live-in vp<[[VEC_TC:%.+]]> = vector-trip-count
1314
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -22,7 +23,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
2223
; CHECK-NEXT: vector.body:
2324
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_IV_NEXT:%.+]]>
2425
; CHECK-NEXT: WIDEN-REDUCTION-PHI ir<[[ACC:%.+]]> = phi ir<0>, ir<[[REDUCE:%.+]]> (VF scaled by 1/4)
25-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
26+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
2627
; CHECK-NEXT: CLONE ir<%gep.a> = getelementptr ir<%a>, vp<[[STEPS]]>
2728
; CHECK-NEXT: vp<[[PTR_A:%.+]]> = vector-pointer ir<%gep.a>
2829
; CHECK-NEXT: WIDEN ir<%load.a> = load vp<[[PTR_A]]>
@@ -42,7 +43,7 @@ define i32 @print_partial_reduction(ptr %a, ptr %b) {
4243
; CHECK-NEXT: middle.block:
4344
; CHECK-NEXT: EMIT vp<[[RED_RESULT:%.+]]> = compute-reduction-result ir<[[ACC]]>, ir<[[REDUCE]]>
4445
; CHECK-NEXT: EMIT vp<[[EXTRACT:%.+]]> = extract-from-end vp<[[RED_RESULT]]>, ir<1>
45-
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<%1>
46+
; CHECK-NEXT: EMIT vp<[[CMP:%.+]]> = icmp eq ir<1024>, vp<[[VEC_TC]]>
4647
; CHECK-NEXT: EMIT branch-on-cond vp<[[CMP]]>
4748
; CHECK-NEXT: Successor(s): ir-bb<exit>, scalar.ph
4849
; CHECK-EMPTY:

llvm/test/Transforms/LoopVectorize/AArch64/widen-call-with-intrinsic-or-libfunc.ll

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ target triple = "arm64-apple-ios"
77

88
; CHECK-LABEL: LV: Checking a loop in 'test'
99
; CHECK: VPlan 'Initial VPlan for VF={2},UF>=1' {
10+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
1011
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
1112
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
1213

@@ -21,7 +22,7 @@ target triple = "arm64-apple-ios"
2122
; CHECK-NEXT: <x1> vector loop: {
2223
; CHECK-NEXT: vector.body:
2324
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
24-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
25+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
2526
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
2627
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
2728
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>
@@ -54,6 +55,7 @@ target triple = "arm64-apple-ios"
5455
; CHECK-NEXT: }
5556

5657
; CHECK: VPlan 'Initial VPlan for VF={4},UF>=1' {
58+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
5759
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
5860
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
5961
; CHECK-NEXT: Live-in ir<1024> = original trip-count
@@ -67,7 +69,7 @@ target triple = "arm64-apple-ios"
6769
; CHECK-NEXT: <x1> vector loop: {
6870
; CHECK-NEXT: vector.body:
6971
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION
70-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
72+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
7173
; CHECK-NEXT: CLONE ir<%gep.src> = getelementptr inbounds ir<%src>, vp<[[STEPS]]>
7274
; CHECK-NEXT: vp<[[VEC_PTR:%.+]]> = vector-pointer ir<%gep.src>
7375
; CHECK-NEXT: WIDEN ir<%l> = load vp<[[VEC_PTR]]>

llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,7 @@ for.cond.cleanup:
7676

7777
define void @safe_dep(ptr %p) {
7878
; CHECK-LABEL: VPlan 'Initial VPlan for VF={2},UF>=1' {
79+
; CHECK-NEXT: Live-in vp<[[VF:%.+]]> = VF
7980
; CHECK-NEXT: Live-in vp<[[VFxUF:%.+]]> = VF * UF
8081
; CHECK-NEXT: Live-in vp<[[VTC:%.+]]> = vector-trip-count
8182
; CHECK-NEXT: Live-in ir<512> = original trip-count
@@ -89,7 +90,7 @@ define void @safe_dep(ptr %p) {
8990
; CHECK-NEXT: <x1> vector loop: {
9091
; CHECK-NEXT: vector.body:
9192
; CHECK-NEXT: EMIT vp<[[CAN_IV:%.+]]> = CANONICAL-INDUCTION ir<0>, vp<[[CAN_INC:%.+]]>
92-
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>
93+
; CHECK-NEXT: vp<[[STEPS:%.+]]> = SCALAR-STEPS vp<[[CAN_IV]]>, ir<1>, vp<[[VF]]>
9394
; CHECK-NEXT: CLONE ir<%a1> = getelementptr ir<%p>, vp<[[STEPS]]>
9495
; CHECK-NEXT: vp<[[VPTR1:%.+]]> = vector-pointer ir<%a1>
9596
; CHECK-NEXT: WIDEN ir<%v> = load vp<[[VPTR1]]>

llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -96,12 +96,11 @@ define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) {
9696
; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]]
9797
; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
9898
; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
99-
; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8
99+
; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP4]], 4
100+
; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP8]], 2
100101
; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]]
101102
; NO-VP: vector.body:
102103
; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
103-
; NO-VP-NEXT: [[TMP7:%.*]] = call i64 @llvm.vscale.i64()
104-
; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP7]], 4
105104
; NO-VP-NEXT: [[TMP9:%.*]] = add i64 [[TMP8]], 0
106105
; NO-VP-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 1
107106
; NO-VP-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], [[TMP10]]

0 commit comments

Comments
 (0)