Skip to content

Commit 87cba43

Browse files
committed
BPF: add a SimplifyCFG IR pass during generic Scalar/IPO optimization
The following bpf linux kernel selftest failed with latest llvm: $ ./test_progs -n 7/10 ... The sequence of 8193 jumps is too complex. verification time 126272 usec stack depth 320 processed 114799 insns (limit 1000000) ... libbpf: failed to load object 'pyperf600_nounroll.o' test_bpf_verif_scale:FAIL:110 rust-lang#7/10 pyperf600_nounroll.o:FAIL rust-lang#7 bpf_verif_scale:FAIL After some investigation, I found the following llvm patch https://reviews.llvm.org/D84108 is responsible. The patch disabled hoisting common instructions in SimplifyCFG by default. Later on, the code changes and a SimplifyCFG phase with hoisting on cannot do the work any more. A test is provided to demonstrate the problem. The IR before simplifyCFG looks like: for.cond: %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] %cmp = icmp ult i32 %i.0, 6 br i1 %cmp, label %for.body, label %for.cond.cleanup for.cond.cleanup: %2 = load i8*, i8** %frame_ptr, align 8, !tbaa !2 %cmp2 = icmp eq i8* %2, null %conv = zext i1 %cmp2 to i32 call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %1) rust-lang#3 call void @llvm.lifetime.end.p0i8(i64 8, i8* nonnull %0) rust-lang#3 ret i32 %conv for.body: %3 = load i8*, i8** %frame_ptr, align 8, !tbaa !2 %tobool.not = icmp eq i8* %3, null br i1 %tobool.not, label %for.inc, label %land.lhs.true The first two insns of `for.cond.cleanup` and `for.body`, load and icmp, can be hoisted to `for.cond` block. With Patch D84108, the optimization is delayed. But unfortunately, later on loop rotation added addition phi nodes to `for.body` and hoisting cannot be done any more. Note such a hoisting is beneficial to bpf programs as bpf verifier does path sensitive analysis and verification. The hoisting preverts reloading from stack which will assume conservative value and increase exploited insns. In this case, it caused verifier failure. To fix this problem, I added an IR pass from bpf target to performance additional simplifycfg with hoisting common inst enabled. Differential Revision: https://reviews.llvm.org/D85434
1 parent 8d943a9 commit 87cba43

File tree

4 files changed

+154
-1
lines changed

4 files changed

+154
-1
lines changed

llvm/lib/Target/BPF/BPFTargetMachine.cpp

+12-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@
2121
#include "llvm/Support/FormattedStream.h"
2222
#include "llvm/Support/TargetRegistry.h"
2323
#include "llvm/Target/TargetOptions.h"
24+
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
25+
#include "llvm/Transforms/Scalar.h"
26+
#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
2427
using namespace llvm;
2528

2629
static cl::
@@ -94,8 +97,16 @@ TargetPassConfig *BPFTargetMachine::createPassConfig(PassManagerBase &PM) {
9497
return new BPFPassConfig(*this, PM);
9598
}
9699

97-
void BPFPassConfig::addIRPasses() {
100+
void BPFTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
101+
Builder.addExtension(
102+
PassManagerBuilder::EP_Peephole,
103+
[&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
104+
PM.add(createCFGSimplificationPass(
105+
SimplifyCFGOptions().hoistCommonInsts(true)));
106+
});
107+
}
98108

109+
void BPFPassConfig::addIRPasses() {
99110
addPass(createBPFAbstractMemberAccess(&getBPFTargetMachine()));
100111
addPass(createBPFPreserveDIType());
101112

llvm/lib/Target/BPF/BPFTargetMachine.h

+2
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ class BPFTargetMachine : public LLVMTargetMachine {
3737
TargetLoweringObjectFile *getObjFileLowering() const override {
3838
return TLOF.get();
3939
}
40+
41+
void adjustPassManager(PassManagerBuilder &) override;
4042
};
4143
}
4244

llvm/lib/Target/BPF/LLVMBuild.txt

+1
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ required_libraries =
3535
MC
3636
BPFDesc
3737
BPFInfo
38+
IPO
3839
SelectionDAG
3940
Support
4041
Target

llvm/test/CodeGen/BPF/simplifycfg.ll

+139
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
; RUN: opt -O2 -S < %s | FileCheck %s
2+
;
3+
; This test tries to ensure that simplifycfg hoisting common instructions
4+
; of then/else branch indeed happens. BPF target has added an IR pass
5+
; before loop optimizations as Commit 1d51dc38d89b
6+
; ([SimplifyCFG][LoopRotate] SimplifyCFG: disable common instruction
7+
; hoisting by default, enable late in pipeline)
8+
; disabled common instruction hoisting. Due to optimization triggered
9+
; code changes, later SimplifyCFG may not be able to perform optimization
10+
; even common inst hoisting is enabled.
11+
;
12+
; Source:
13+
; typedef struct {
14+
; void *f_back;
15+
; } FrameData;
16+
; extern int get_data(void *, void *);
17+
; extern void get_frame_ptr(void *);
18+
; int test() {
19+
; void *frame_ptr;
20+
; FrameData frame;
21+
;
22+
; get_frame_ptr(&frame_ptr);
23+
;
24+
; #pragma nounroll
25+
; for (int i = 0; i < 6; i++) {
26+
; if (frame_ptr && get_data(frame_ptr, &frame)) {
27+
; frame_ptr = frame.f_back;
28+
; }
29+
; }
30+
; return frame_ptr == 0;
31+
; }
32+
; Compilation flag:
33+
; clang -target bpf -O2 -Xclang -disable-llvm-passes -S -emit-llvm t.c -o t.ll
34+
35+
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
36+
target triple = "bpf"
37+
38+
%struct.FrameData = type { i8* }
39+
40+
; Function Attrs: nounwind
41+
define dso_local i32 @test() #0 {
42+
entry:
43+
%frame_ptr = alloca i8*, align 8
44+
%frame = alloca %struct.FrameData, align 8
45+
%i = alloca i32, align 4
46+
%0 = bitcast i8** %frame_ptr to i8*
47+
call void @llvm.lifetime.start.p0i8(i64 8, i8* %0) #3
48+
%1 = bitcast %struct.FrameData* %frame to i8*
49+
call void @llvm.lifetime.start.p0i8(i64 8, i8* %1) #3
50+
%2 = bitcast i8** %frame_ptr to i8*
51+
call void @get_frame_ptr(i8* %2)
52+
%3 = bitcast i32* %i to i8*
53+
call void @llvm.lifetime.start.p0i8(i64 4, i8* %3) #3
54+
store i32 0, i32* %i, align 4, !tbaa !2
55+
br label %for.cond
56+
57+
; CHECK-LABEL: entry
58+
; CHECK: %{{[0-9]+}} = load i8*, i8** %frame_ptr, align 8
59+
; CHECK: %{{[0-9a-z.]+}} = icmp eq i8* %2, null
60+
; CHECK: br label
61+
62+
for.cond: ; preds = %for.inc, %entry
63+
%4 = load i32, i32* %i, align 4, !tbaa !2
64+
%cmp = icmp slt i32 %4, 6
65+
br i1 %cmp, label %for.body, label %for.cond.cleanup
66+
67+
for.cond.cleanup: ; preds = %for.cond
68+
%5 = bitcast i32* %i to i8*
69+
call void @llvm.lifetime.end.p0i8(i64 4, i8* %5) #3
70+
br label %for.end
71+
72+
for.body: ; preds = %for.cond
73+
%6 = load i8*, i8** %frame_ptr, align 8, !tbaa !6
74+
%tobool = icmp ne i8* %6, null
75+
br i1 %tobool, label %land.lhs.true, label %if.end
76+
77+
land.lhs.true: ; preds = %for.body
78+
%7 = load i8*, i8** %frame_ptr, align 8, !tbaa !6
79+
%8 = bitcast %struct.FrameData* %frame to i8*
80+
%call = call i32 @get_data(i8* %7, i8* %8)
81+
%tobool1 = icmp ne i32 %call, 0
82+
br i1 %tobool1, label %if.then, label %if.end
83+
84+
if.then: ; preds = %land.lhs.true
85+
%f_back = getelementptr inbounds %struct.FrameData, %struct.FrameData* %frame, i32 0, i32 0
86+
%9 = load i8*, i8** %f_back, align 8, !tbaa !8
87+
store i8* %9, i8** %frame_ptr, align 8, !tbaa !6
88+
br label %if.end
89+
90+
if.end: ; preds = %if.then, %land.lhs.true, %for.body
91+
br label %for.inc
92+
93+
for.inc: ; preds = %if.end
94+
%10 = load i32, i32* %i, align 4, !tbaa !2
95+
%inc = add nsw i32 %10, 1
96+
store i32 %inc, i32* %i, align 4, !tbaa !2
97+
br label %for.cond, !llvm.loop !10
98+
99+
for.end: ; preds = %for.cond.cleanup
100+
%11 = load i8*, i8** %frame_ptr, align 8, !tbaa !6
101+
%cmp2 = icmp eq i8* %11, null
102+
%conv = zext i1 %cmp2 to i32
103+
%12 = bitcast %struct.FrameData* %frame to i8*
104+
call void @llvm.lifetime.end.p0i8(i64 8, i8* %12) #3
105+
%13 = bitcast i8** %frame_ptr to i8*
106+
call void @llvm.lifetime.end.p0i8(i64 8, i8* %13) #3
107+
ret i32 %conv
108+
}
109+
110+
; Function Attrs: argmemonly nounwind willreturn
111+
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
112+
113+
declare dso_local void @get_frame_ptr(i8*) #2
114+
115+
declare dso_local i32 @get_data(i8*, i8*) #2
116+
117+
; Function Attrs: argmemonly nounwind willreturn
118+
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
119+
120+
attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
121+
attributes #1 = { argmemonly nounwind willreturn }
122+
attributes #2 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
123+
attributes #3 = { nounwind }
124+
125+
!llvm.module.flags = !{!0}
126+
!llvm.ident = !{!1}
127+
128+
!0 = !{i32 1, !"wchar_size", i32 4}
129+
!1 = !{!"clang version 12.0.0 (https://github.com/llvm/llvm-project.git 1b3c1c543269da36ae41ab84f646cf98d2e5b1e5)"}
130+
!2 = !{!3, !3, i64 0}
131+
!3 = !{!"int", !4, i64 0}
132+
!4 = !{!"omnipotent char", !5, i64 0}
133+
!5 = !{!"Simple C/C++ TBAA"}
134+
!6 = !{!7, !7, i64 0}
135+
!7 = !{!"any pointer", !4, i64 0}
136+
!8 = !{!9, !7, i64 0}
137+
!9 = !{!"", !7, i64 0}
138+
!10 = distinct !{!10, !11}
139+
!11 = !{!"llvm.loop.unroll.disable"}

0 commit comments

Comments
 (0)