Skip to content

Commit dc6d077

Browse files
authored
[CVP] Infer nneg on existing zext (#72052)
This patch infers `nneg` flags for existing zext instructions in CVP. After #71534 and this patch, we can drop `zext -> zext nneg` transform in `RISCVCodeGenPrepare`: https://github.com/llvm/llvm-project/blob/40671bbdefb6ff83e2685576a3cb041b62f25bbe/llvm/lib/Target/RISCV/RISCVCodeGenPrepare.cpp#L74-L83 This is an alternative to #72049.
1 parent f054947 commit dc6d077

File tree

6 files changed

+204
-9
lines changed

6 files changed

+204
-9
lines changed

llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ STATISTIC(NumNonNull, "Number of function pointer arguments marked non-null");
9393
STATISTIC(NumMinMax, "Number of llvm.[us]{min,max} intrinsics removed");
9494
STATISTIC(NumUDivURemsNarrowedExpanded,
9595
"Number of bound udiv's/urem's expanded");
96+
STATISTIC(NumZExt, "Number of non-negative deductions");
9697

9798
static bool processSelect(SelectInst *S, LazyValueInfo *LVI) {
9899
if (S->getType()->isVectorTy() || isa<Constant>(S->getCondition()))
@@ -1032,6 +1033,24 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
10321033
return true;
10331034
}
10341035

1036+
static bool processZExt(ZExtInst *ZExt, LazyValueInfo *LVI) {
1037+
if (ZExt->getType()->isVectorTy())
1038+
return false;
1039+
1040+
if (ZExt->hasNonNeg())
1041+
return false;
1042+
1043+
const Use &Base = ZExt->getOperandUse(0);
1044+
if (!LVI->getConstantRangeAtUse(Base, /*UndefAllowed*/ false)
1045+
.isAllNonNegative())
1046+
return false;
1047+
1048+
++NumZExt;
1049+
ZExt->setNonNeg();
1050+
1051+
return true;
1052+
}
1053+
10351054
static bool processBinOp(BinaryOperator *BinOp, LazyValueInfo *LVI) {
10361055
using OBO = OverflowingBinaryOperator;
10371056

@@ -1162,6 +1181,9 @@ static bool runImpl(Function &F, LazyValueInfo *LVI, DominatorTree *DT,
11621181
case Instruction::SExt:
11631182
BBChanged |= processSExt(cast<SExtInst>(&II), LVI);
11641183
break;
1184+
case Instruction::ZExt:
1185+
BBChanged |= processZExt(cast<ZExtInst>(&II), LVI);
1186+
break;
11651187
case Instruction::Add:
11661188
case Instruction::Sub:
11671189
case Instruction::Mul:
Lines changed: 173 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,173 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2+
; RUN: opt < %s -passes=correlated-propagation -S | FileCheck %s
3+
4+
declare void @use64(i64)
5+
6+
define void @test1(i32 %n) {
7+
; CHECK-LABEL: @test1(
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: br label [[FOR_COND:%.*]]
10+
; CHECK: for.cond:
11+
; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
12+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -1
13+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
14+
; CHECK: for.body:
15+
; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[A]] to i64
16+
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
17+
; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
18+
; CHECK-NEXT: br label [[FOR_COND]]
19+
; CHECK: for.end:
20+
; CHECK-NEXT: ret void
21+
;
22+
entry:
23+
br label %for.cond
24+
25+
for.cond: ; preds = %for.body, %entry
26+
%a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
27+
%cmp = icmp sgt i32 %a, -1
28+
br i1 %cmp, label %for.body, label %for.end
29+
30+
for.body: ; preds = %for.cond
31+
%ext.wide = zext i32 %a to i64
32+
call void @use64(i64 %ext.wide)
33+
%ext = trunc i64 %ext.wide to i32
34+
br label %for.cond
35+
36+
for.end: ; preds = %for.cond
37+
ret void
38+
}
39+
40+
;; Negative test to show transform doesn't happen unless n >= 0.
41+
define void @test2(i32 %n) {
42+
; CHECK-LABEL: @test2(
43+
; CHECK-NEXT: entry:
44+
; CHECK-NEXT: br label [[FOR_COND:%.*]]
45+
; CHECK: for.cond:
46+
; CHECK-NEXT: [[A:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[EXT:%.*]], [[FOR_BODY:%.*]] ]
47+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[A]], -2
48+
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
49+
; CHECK: for.body:
50+
; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[A]] to i64
51+
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
52+
; CHECK-NEXT: [[EXT]] = trunc i64 [[EXT_WIDE]] to i32
53+
; CHECK-NEXT: br label [[FOR_COND]]
54+
; CHECK: for.end:
55+
; CHECK-NEXT: ret void
56+
;
57+
entry:
58+
br label %for.cond
59+
60+
for.cond: ; preds = %for.body, %entry
61+
%a = phi i32 [ %n, %entry ], [ %ext, %for.body ]
62+
%cmp = icmp sgt i32 %a, -2
63+
br i1 %cmp, label %for.body, label %for.end
64+
65+
for.body: ; preds = %for.cond
66+
%ext.wide = zext i32 %a to i64
67+
call void @use64(i64 %ext.wide)
68+
%ext = trunc i64 %ext.wide to i32
69+
br label %for.cond
70+
71+
for.end: ; preds = %for.cond
72+
ret void
73+
}
74+
75+
;; Non looping test case.
76+
define void @test3(i32 %n) {
77+
; CHECK-LABEL: @test3(
78+
; CHECK-NEXT: entry:
79+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1
80+
; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
81+
; CHECK: bb:
82+
; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext nneg i32 [[N]] to i64
83+
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
84+
; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
85+
; CHECK-NEXT: br label [[EXIT]]
86+
; CHECK: exit:
87+
; CHECK-NEXT: ret void
88+
;
89+
entry:
90+
%cmp = icmp sgt i32 %n, -1
91+
br i1 %cmp, label %bb, label %exit
92+
93+
bb:
94+
%ext.wide = zext i32 %n to i64
95+
call void @use64(i64 %ext.wide)
96+
%ext = trunc i64 %ext.wide to i32
97+
br label %exit
98+
99+
exit:
100+
ret void
101+
}
102+
103+
;; Non looping negative test case.
104+
define void @test4(i32 %n) {
105+
; CHECK-LABEL: @test4(
106+
; CHECK-NEXT: entry:
107+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -2
108+
; CHECK-NEXT: br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
109+
; CHECK: bb:
110+
; CHECK-NEXT: [[EXT_WIDE:%.*]] = zext i32 [[N]] to i64
111+
; CHECK-NEXT: call void @use64(i64 [[EXT_WIDE]])
112+
; CHECK-NEXT: [[EXT:%.*]] = trunc i64 [[EXT_WIDE]] to i32
113+
; CHECK-NEXT: br label [[EXIT]]
114+
; CHECK: exit:
115+
; CHECK-NEXT: ret void
116+
;
117+
entry:
118+
%cmp = icmp sgt i32 %n, -2
119+
br i1 %cmp, label %bb, label %exit
120+
121+
bb:
122+
%ext.wide = zext i32 %n to i64
123+
call void @use64(i64 %ext.wide)
124+
%ext = trunc i64 %ext.wide to i32
125+
br label %exit
126+
127+
exit:
128+
ret void
129+
}
130+
131+
define i64 @may_including_undef(i1 %c.1, i1 %c.2) {
132+
; CHECK-LABEL: @may_including_undef(
133+
; CHECK-NEXT: br i1 [[C_1:%.*]], label [[TRUE_1:%.*]], label [[FALSE:%.*]]
134+
; CHECK: true.1:
135+
; CHECK-NEXT: br i1 [[C_2:%.*]], label [[TRUE_2:%.*]], label [[EXIT:%.*]]
136+
; CHECK: true.2:
137+
; CHECK-NEXT: br label [[EXIT]]
138+
; CHECK: false:
139+
; CHECK-NEXT: br label [[EXIT]]
140+
; CHECK: exit:
141+
; CHECK-NEXT: [[P:%.*]] = phi i32 [ 0, [[TRUE_1]] ], [ 1, [[TRUE_2]] ], [ undef, [[FALSE]] ]
142+
; CHECK-NEXT: [[EXT:%.*]] = zext i32 [[P]] to i64
143+
; CHECK-NEXT: ret i64 [[EXT]]
144+
;
145+
br i1 %c.1, label %true.1, label %false
146+
147+
true.1:
148+
br i1 %c.2, label %true.2, label %exit
149+
150+
true.2:
151+
br label %exit
152+
153+
false:
154+
br label %exit
155+
156+
exit:
157+
%p = phi i32 [ 0, %true.1 ], [ 1, %true.2], [ undef, %false ]
158+
%ext = zext i32 %p to i64
159+
ret i64 %ext
160+
}
161+
162+
define i64 @test_infer_at_use(i32 noundef %n) {
163+
; CHECK-LABEL: @test_infer_at_use(
164+
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], -1
165+
; CHECK-NEXT: [[EXT:%.*]] = zext nneg i32 [[N]] to i64
166+
; CHECK-NEXT: [[SELECT:%.*]] = select i1 [[CMP]], i64 [[EXT]], i64 0
167+
; CHECK-NEXT: ret i64 [[SELECT]]
168+
;
169+
%cmp = icmp sgt i32 %n, -1
170+
%ext = zext i32 %n to i64
171+
%select = select i1 %cmp, i64 %ext, i64 0
172+
ret i64 %select
173+
}

llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,11 +20,11 @@ define void @fp_iv_loop1(ptr noalias nocapture %A, i32 %N) #0 {
2020
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
2121
; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
2222
; AUTO_VEC: for.body.preheader:
23-
; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
23+
; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64
2424
; AUTO_VEC-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 32
2525
; AUTO_VEC-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
2626
; AUTO_VEC: vector.ph:
27-
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 4294967264
27+
; AUTO_VEC-NEXT: [[N_VEC:%.*]] = and i64 [[ZEXT]], 2147483616
2828
; AUTO_VEC-NEXT: [[DOTCAST:%.*]] = sitofp i64 [[N_VEC]] to float
2929
; AUTO_VEC-NEXT: [[TMP0:%.*]] = fmul fast float [[DOTCAST]], 5.000000e-01
3030
; AUTO_VEC-NEXT: [[IND_END:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
@@ -103,12 +103,12 @@ define void @fp_iv_loop2(ptr noalias nocapture %A, i32 %N) {
103103
; AUTO_VEC-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
104104
; AUTO_VEC-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
105105
; AUTO_VEC: for.body.preheader:
106-
; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext i32 [[N]] to i64
106+
; AUTO_VEC-NEXT: [[ZEXT:%.*]] = zext nneg i32 [[N]] to i64
107107
; AUTO_VEC-NEXT: [[XTRAITER:%.*]] = and i64 [[ZEXT]], 7
108108
; AUTO_VEC-NEXT: [[TMP0:%.*]] = icmp ult i32 [[N]], 8
109109
; AUTO_VEC-NEXT: br i1 [[TMP0]], label [[FOR_END_LOOPEXIT_UNR_LCSSA:%.*]], label [[FOR_BODY_PREHEADER_NEW:%.*]]
110110
; AUTO_VEC: for.body.preheader.new:
111-
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 4294967288
111+
; AUTO_VEC-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[ZEXT]], 2147483640
112112
; AUTO_VEC-NEXT: br label [[FOR_BODY:%.*]]
113113
; AUTO_VEC: for.body:
114114
; AUTO_VEC-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[FOR_BODY_PREHEADER_NEW]] ], [ [[INDVARS_IV_NEXT_7:%.*]], [[FOR_BODY]] ]

llvm/test/Transforms/PhaseOrdering/X86/excessive-unrolling.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,11 +171,11 @@ define void @test_runtime_trip_count(i32 %N) {
171171
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
172172
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[EXIT:%.*]]
173173
; CHECK: for.body.preheader:
174-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[N]] to i64
174+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[N]] to i64
175175
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
176176
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER7:%.*]], label [[VECTOR_PH:%.*]]
177177
; CHECK: vector.ph:
178-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967292
178+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483644
179179
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
180180
; CHECK: vector.body:
181181
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/PhaseOrdering/X86/pixel-splat.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,11 @@ define void @loop_or(ptr noalias %pIn, ptr noalias %pOut, i32 %s) {
2424
; CHECK-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[S:%.*]], 0
2525
; CHECK-NEXT: br i1 [[CMP1]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
2626
; CHECK: for.body.preheader:
27-
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext i32 [[S]] to i64
27+
; CHECK-NEXT: [[WIDE_TRIP_COUNT:%.*]] = zext nneg i32 [[S]] to i64
2828
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[S]], 8
2929
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY_PREHEADER5:%.*]], label [[VECTOR_PH:%.*]]
3030
; CHECK: vector.ph:
31-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967288
31+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483640
3232
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
3333
; CHECK: vector.body:
3434
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]

llvm/test/Transforms/PhaseOrdering/X86/vdiv.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ define void @vdiv(ptr %x, ptr %y, double %a, i32 %N) #0 {
2626
; CHECK-NEXT: [[OR_COND:%.*]] = select i1 [[MIN_ITERS_CHECK]], i1 true, i1 [[DIFF_CHECK]]
2727
; CHECK-NEXT: br i1 [[OR_COND]], label [[FOR_BODY_PREHEADER9:%.*]], label [[VECTOR_PH:%.*]]
2828
; CHECK: vector.ph:
29-
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 4294967280
29+
; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[WIDE_TRIP_COUNT]], 2147483632
3030
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[A:%.*]], i64 0
3131
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
3232
; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[BROADCAST_SPLAT]]

0 commit comments

Comments
 (0)