Skip to content

Commit 14b2ec9

Browse files
committed
[ARM] Enable UpperBound unrolling for all loops
This UpperBound unrolling was already enabled so long as a series of conditions in ARMTTIImpl::getUnrollingPreferences pass. This just always enables it as it can help fully unroll loops that would not otherwise pass those tests. Differential Revision: https://reviews.llvm.org/D99174
1 parent a870870 commit 14b2ec9

File tree

2 files changed

+12
-52
lines changed

2 files changed

+12
-52
lines changed

llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2125,6 +2125,10 @@ bool ARMTTIImpl::emitGetActiveLaneMask() const {
21252125
}
21262126
void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
21272127
TTI::UnrollingPreferences &UP) {
2128+
// Enable Upper bound unrolling universally, not dependant upon the conditions
2129+
// below.
2130+
UP.UpperBound = true;
2131+
21282132
// Only currently enable these preferences for M-Class cores.
21292133
if (!ST->isMClass())
21302134
return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP);
@@ -2187,7 +2191,6 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
21872191

21882192
UP.Partial = true;
21892193
UP.Runtime = true;
2190-
UP.UpperBound = true;
21912194
UP.UnrollRemainder = true;
21922195
UP.DefaultUnrollRuntimeCount = 4;
21932196
UP.UnrollAndJam = true;

llvm/test/Transforms/LoopUnroll/ARM/upperbound.ll

Lines changed: 8 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -73,58 +73,15 @@ while.end: ; preds = %if.end, %entry
7373
define i32 @test2(i32 %l86) {
7474
; CHECK-LABEL: @test2(
7575
; CHECK-NEXT: entry:
76-
; CHECK-NEXT: br label [[FOR_BODY_I_I:%.*]]
77-
; CHECK: for.body.i.i:
78-
; CHECK-NEXT: [[I_0137_I_I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_I_3_I:%.*]], [[FOR_BODY_I_3_I:%.*]] ]
79-
; CHECK-NEXT: [[ADD_I_I:%.*]] = or i32 [[I_0137_I_I]], 1
80-
; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[ADD_I_I]] to i64
81-
; CHECK-NEXT: [[ARRAYIDX_I_I:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* @data, i64 0, i64 [[TMP0]]
82-
; CHECK-NEXT: [[L93:%.*]] = load i32, i32* [[ARRAYIDX_I_I]], align 4
83-
; CHECK-NEXT: [[CMP1_I_I:%.*]] = icmp sgt i32 [[L93]], [[L86:%.*]]
84-
; CHECK-NEXT: br i1 [[CMP1_I_I]], label [[LAND_LHS_TRUE_I_I:%.*]], label [[FOR_INC_I_I:%.*]]
85-
; CHECK: land.lhs.true.i.i:
86-
; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[I_0137_I_I]] to i64
87-
; CHECK-NEXT: [[ARRAYIDX2_I_I:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* @data, i64 0, i64 [[TMP1]]
88-
; CHECK-NEXT: [[L94:%.*]] = load i32, i32* [[ARRAYIDX2_I_I]], align 4
89-
; CHECK-NEXT: [[CMP3_NOT_I_I:%.*]] = icmp sgt i32 [[L94]], [[L86]]
90-
; CHECK-NEXT: br i1 [[CMP3_NOT_I_I]], label [[FOR_INC_I_I]], label [[FOR_END_I_IF_END8_I_CRIT_EDGE_I:%.*]]
91-
; CHECK: for.inc.i.i:
92-
; CHECK-NEXT: [[EXITCOND_NOT_I_I:%.*]] = icmp eq i32 [[ADD_I_I]], 25
93-
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I_I]], label [[FOR_END_I_IF_END8_I_CRIT_EDGE_I]], label [[FOR_BODY_I_1_I:%.*]]
94-
; CHECK: for.body.i.1.i:
95-
; CHECK-NEXT: [[ADD_I_1_I:%.*]] = or i32 [[I_0137_I_I]], 2
96-
; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[ADD_I_1_I]] to i64
97-
; CHECK-NEXT: [[ARRAYIDX_I_1_I:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* @data, i64 0, i64 [[TMP2]]
98-
; CHECK-NEXT: [[L345:%.*]] = load i32, i32* [[ARRAYIDX_I_1_I]], align 4
99-
; CHECK-NEXT: [[CMP1_I_1_I:%.*]] = icmp sgt i32 [[L345]], [[L86]]
100-
; CHECK-NEXT: [[CMP1_I_1_I_NOT:%.*]] = xor i1 [[CMP1_I_1_I]], true
101-
; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMP1_I_I]], [[CMP1_I_1_I_NOT]]
102-
; CHECK-NEXT: br i1 [[BRMERGE]], label [[FOR_INC_I_1_I:%.*]], label [[FOR_END_I_I:%.*]]
103-
; CHECK: for.inc.i.1.i:
104-
; CHECK-NEXT: [[ADD_I_2_I:%.*]] = or i32 [[I_0137_I_I]], 3
105-
; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[ADD_I_2_I]] to i64
106-
; CHECK-NEXT: [[ARRAYIDX_I_2_I:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* @data, i64 0, i64 [[TMP3]]
107-
; CHECK-NEXT: [[L346:%.*]] = load i32, i32* [[ARRAYIDX_I_2_I]], align 4
108-
; CHECK-NEXT: [[CMP1_I_2_I:%.*]] = icmp sgt i32 [[L346]], [[L86]]
109-
; CHECK-NEXT: [[CMP1_I_2_I_NOT:%.*]] = xor i1 [[CMP1_I_2_I]], true
110-
; CHECK-NEXT: [[BRMERGE1:%.*]] = or i1 [[CMP1_I_1_I]], [[CMP1_I_2_I_NOT]]
111-
; CHECK-NEXT: br i1 [[BRMERGE1]], label [[FOR_BODY_I_3_I]], label [[FOR_END_I_IF_END8_I_CRIT_EDGE_I]]
112-
; CHECK: for.body.i.3.i:
113-
; CHECK-NEXT: [[ADD_I_3_I]] = add nuw nsw i32 [[I_0137_I_I]], 4
114-
; CHECK-NEXT: [[TMP4:%.*]] = zext i32 [[ADD_I_3_I]] to i64
115-
; CHECK-NEXT: [[ARRAYIDX_I_3_I:%.*]] = getelementptr inbounds [50 x i32], [50 x i32]* @data, i64 0, i64 [[TMP4]]
116-
; CHECK-NEXT: [[L347:%.*]] = load i32, i32* [[ARRAYIDX_I_3_I]], align 4
117-
; CHECK-NEXT: [[CMP1_I_3_I:%.*]] = icmp sle i32 [[L347]], [[L86]]
118-
; CHECK-NEXT: [[BRMERGE2:%.*]] = or i1 [[CMP1_I_3_I]], [[CMP1_I_2_I]]
119-
; CHECK-NEXT: br i1 [[BRMERGE2]], label [[FOR_BODY_I_I]], label [[FOR_END_I_I]]
120-
; CHECK: for.end.i.i:
121-
; CHECK-NEXT: [[I_0_LCSSA_I_I:%.*]] = phi i32 [ [[ADD_I_I]], [[FOR_BODY_I_1_I]] ], [ [[ADD_I_2_I]], [[FOR_BODY_I_3_I]] ]
122-
; CHECK-NEXT: [[CMP5_I_I:%.*]] = icmp eq i32 [[I_0_LCSSA_I_I]], 25
123-
; CHECK-NEXT: [[SPEC_SELECT:%.*]] = select i1 [[CMP5_I_I]], i32 2, i32 0
124-
; CHECK-NEXT: br label [[FOR_END_I_IF_END8_I_CRIT_EDGE_I]]
76+
; CHECK-NEXT: [[L86_OFF:%.*]] = add i32 [[L86:%.*]], -1
77+
; CHECK-NEXT: [[SWITCH:%.*]] = icmp ult i32 [[L86_OFF]], 24
78+
; CHECK-NEXT: br i1 [[SWITCH]], label [[FOR_END_I_IF_END8_I_CRIT_EDGE_I:%.*]], label [[FOR_INC_I_3_I_5:%.*]]
12579
; CHECK: for.end.i.if.end8.i_crit_edge.i:
126-
; CHECK-NEXT: [[MERGE:%.*]] = phi i32 [ 0, [[FOR_INC_I_1_I]] ], [ 0, [[LAND_LHS_TRUE_I_I]] ], [ 1, [[FOR_INC_I_I]] ], [ [[SPEC_SELECT]], [[FOR_END_I_I]] ]
127-
; CHECK-NEXT: ret i32 [[MERGE]]
80+
; CHECK-NEXT: ret i32 0
81+
; CHECK: for.inc.i.3.i.5:
82+
; CHECK-NEXT: [[DOTNOT30:%.*]] = icmp ne i32 [[L86]], 25
83+
; CHECK-NEXT: [[SPEC_SELECT24:%.*]] = zext i1 [[DOTNOT30]] to i32
84+
; CHECK-NEXT: ret i32 [[SPEC_SELECT24]]
12885
;
12986
entry:
13087
br label %for.body.i.i

0 commit comments

Comments
 (0)