Skip to content

Commit a9633d5

Browse files
committed
[LFTR] Use recomputed BE count
This was discussed as part of D62880. The basic thought is that computing BE taken count after widening should produce (on average) an equally good backedge taken count as the one before widening. Since there's only one test in the suite which is impacted by this change, and it's essentially equivelent codegen, that seems to be a reasonable assertion. This change was separated from r362971 so that if this turns out to be problematic, the triggering piece is obvious and easily revertable. For the nestedIV example from elim-extend.ll, we end up with the following BE counts: BEFORE: (-2 + (-1 * %innercount) + %limit) AFTER: (-1 + (sext i32 (-1 + %limit) to i64) + (-1 * (sext i32 %innercount to i64))<nsw>) Note that before is an i32 type, and the after is an i64. Truncating the i64 produces the i32. llvm-svn: 362975
1 parent 9c7f93e commit a9633d5

File tree

2 files changed

+11
-19
lines changed

2 files changed

+11
-19
lines changed

llvm/lib/Transforms/Scalar/IndVarSimplify.cpp

+1-9
Original file line numberDiff line numberDiff line change
@@ -2635,15 +2635,7 @@ bool IndVarSimplify::run(Loop *L) {
26352635
if (!needsLFTR(L, ExitingBB))
26362636
continue;
26372637

2638-
// Note: This block of code is here strictly to seperate an change into
2639-
// two parts: one NFC, one not. What's happening here is that SCEV is
2640-
// returning a more expensive expression for the BackedgeTakenCount for
2641-
// the loop after widening in rare circumstances. In review, we decided
2642-
// to accept that small difference - since it has minimal test suite
2643-
// impact - but for ease of attribution, the functional diff will be it's
2644-
// own change.
2645-
const SCEV *BETakenCount = L->getExitingBlock() ?
2646-
BackedgeTakenCount : SE->getExitCount(L, ExitingBB);
2638+
const SCEV *BETakenCount = SE->getExitCount(L, ExitingBB);
26472639
if (isa<SCEVCouldNotCompute>(BETakenCount))
26482640
continue;
26492641

llvm/test/Transforms/IndVarSimplify/elim-extend.ll

+10-10
Original file line numberDiff line numberDiff line change
@@ -112,44 +112,44 @@ define void @nestedIV(i8* %address, i32 %limit) nounwind {
112112
; CHECK-LABEL: @nestedIV(
113113
; CHECK-NEXT: entry:
114114
; CHECK-NEXT: [[LIMITDEC:%.*]] = add i32 [[LIMIT:%.*]], -1
115-
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMIT]] to i64
115+
; CHECK-NEXT: [[TMP0:%.*]] = sext i32 [[LIMITDEC]] to i64
116+
; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[LIMIT]] to i64
116117
; CHECK-NEXT: br label [[OUTERLOOP:%.*]]
117118
; CHECK: outerloop:
118119
; CHECK-NEXT: [[INDVARS_IV1:%.*]] = phi i64 [ [[INDVARS_IV_NEXT2:%.*]], [[OUTERMERGE:%.*]] ], [ 0, [[ENTRY:%.*]] ]
119120
; CHECK-NEXT: [[INNERCOUNT:%.*]] = phi i32 [ [[INNERCOUNT_MERGE:%.*]], [[OUTERMERGE]] ], [ 0, [[ENTRY]] ]
120-
; CHECK-NEXT: [[TMP1:%.*]] = add nsw i64 [[INDVARS_IV1]], -1
121-
; CHECK-NEXT: [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP1]]
121+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw i64 [[INDVARS_IV1]], -1
122+
; CHECK-NEXT: [[ADR1:%.*]] = getelementptr i8, i8* [[ADDRESS:%.*]], i64 [[TMP2]]
122123
; CHECK-NEXT: store i8 0, i8* [[ADR1]]
123124
; CHECK-NEXT: br label [[INNERPREHEADER:%.*]]
124125
; CHECK: innerpreheader:
125126
; CHECK-NEXT: [[INNERPRECMP:%.*]] = icmp sgt i32 [[LIMITDEC]], [[INNERCOUNT]]
126127
; CHECK-NEXT: br i1 [[INNERPRECMP]], label [[INNERLOOP_PREHEADER:%.*]], label [[OUTERMERGE]]
127128
; CHECK: innerloop.preheader:
128-
; CHECK-NEXT: [[TMP2:%.*]] = sext i32 [[INNERCOUNT]] to i64
129+
; CHECK-NEXT: [[TMP3:%.*]] = sext i32 [[INNERCOUNT]] to i64
129130
; CHECK-NEXT: br label [[INNERLOOP:%.*]]
130131
; CHECK: innerloop:
131-
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP2]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ]
132+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[TMP3]], [[INNERLOOP_PREHEADER]] ], [ [[INDVARS_IV_NEXT:%.*]], [[INNERLOOP]] ]
132133
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nsw i64 [[INDVARS_IV]], 1
133134
; CHECK-NEXT: [[ADR2:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV]]
134135
; CHECK-NEXT: store i8 0, i8* [[ADR2]]
135136
; CHECK-NEXT: [[ADR3:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV_NEXT]]
136137
; CHECK-NEXT: store i8 0, i8* [[ADR3]]
137-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = sext i32 [[LIMITDEC]] to i64
138-
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[WIDE_TRIP_COUNT]]
138+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV_NEXT]], [[TMP0]]
139139
; CHECK-NEXT: br i1 [[EXITCOND]], label [[INNERLOOP]], label [[INNEREXIT:%.*]]
140140
; CHECK: innerexit:
141141
; CHECK-NEXT: [[INNERCOUNT_LCSSA_WIDE:%.*]] = phi i64 [ [[INDVARS_IV_NEXT]], [[INNERLOOP]] ]
142-
; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
142+
; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[INNERCOUNT_LCSSA_WIDE]] to i32
143143
; CHECK-NEXT: br label [[OUTERMERGE]]
144144
; CHECK: outermerge:
145-
; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP3]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]
145+
; CHECK-NEXT: [[INNERCOUNT_MERGE]] = phi i32 [ [[TMP4]], [[INNEREXIT]] ], [ [[INNERCOUNT]], [[INNERPREHEADER]] ]
146146
; CHECK-NEXT: [[ADR4:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[INDVARS_IV1]]
147147
; CHECK-NEXT: store i8 0, i8* [[ADR4]]
148148
; CHECK-NEXT: [[OFS5:%.*]] = sext i32 [[INNERCOUNT_MERGE]] to i64
149149
; CHECK-NEXT: [[ADR5:%.*]] = getelementptr i8, i8* [[ADDRESS]], i64 [[OFS5]]
150150
; CHECK-NEXT: store i8 0, i8* [[ADR5]]
151151
; CHECK-NEXT: [[INDVARS_IV_NEXT2]] = add nuw nsw i64 [[INDVARS_IV1]], 1
152-
; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT2]], [[TMP0]]
152+
; CHECK-NEXT: [[TMP47:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT2]], [[TMP1]]
153153
; CHECK-NEXT: br i1 [[TMP47]], label [[OUTERLOOP]], label [[RETURN:%.*]]
154154
; CHECK: return:
155155
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)