Skip to content

Commit 9be5033

Browse files
authored
Add pass to load definitions of common functions from bc files (rust-lang#220)
Starting with cblas_ddot
1 parent a75e86a commit 9be5033

File tree

11 files changed

+332
-1
lines changed

11 files changed

+332
-1
lines changed

.github/workflows/bcload.yml

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
name: Bitcode loading CI
2+
3+
on: [push]
4+
5+
jobs:
6+
build:
7+
name: Bitcode loading CI LLVM ${{ matrix.llvm }} ${{ matrix.build }} ${{ matrix.os }}
8+
runs-on: ${{ matrix.os }}
9+
10+
strategy:
11+
fail-fast: false
12+
matrix:
13+
llvm: ["7", "8", "9", "10", "11", "12"]
14+
build: ["Release"] # "RelWithDebInfo"
15+
os: [ubuntu-20.04, ubuntu-18.04]
16+
17+
exclude:
18+
# How to install FileCheck on ubuntu-18.04?
19+
- os: ubuntu-18.04
20+
llvm: 8
21+
22+
timeout-minutes: 30
23+
steps:
24+
- name: add llvm
25+
run: |
26+
wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
27+
sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true
28+
sudo apt-get install -y autoconf cmake gcc g++ libtool gfortran llvm-${{ matrix.llvm }}-dev libomp-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev
29+
sudo python3 -m pip install --upgrade pip setuptools
30+
sudo python3 -m pip install lit
31+
sudo touch /usr/lib/llvm-${{ matrix.llvm }}/bin/yaml-bench
32+
if [[ '${{ matrix.llvm }}' == '7' || '${{ matrix.llvm }}' == '8' || '${{ matrix.llvm }}' == '9' ]]; then
33+
sudo apt-get install -y llvm-${{ matrix.llvm }}-tools
34+
fi
35+
- uses: actions/checkout@v1
36+
with:
37+
fetch-depth: 1
38+
- name: mkdir
39+
run: cd enzyme && rm -rf build && mkdir build
40+
- name: cmake
41+
run: |
42+
cd enzyme/build
43+
cmake .. -DLLVM_EXTERNAL_LIT=`which lit` -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm }}/lib/cmake/llvm
44+
- name: make
45+
run: cd enzyme/build && make -j`nproc`
46+
- name: make check-bcpass
47+
run: cd enzyme/build && make check-bcpass -j`nproc`

enzyme/BCLoad/BCLoader.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
#include "llvm/IR/Module.h"
2+
#include "llvm/IRReader/IRReader.h"
3+
#include "llvm/Linker/Linker.h"
4+
#include "llvm/Pass.h"
5+
#include "llvm/Support/CommandLine.h"
6+
#include "llvm/Support/SourceMgr.h"
7+
#include "llvm/Support/raw_ostream.h"
8+
9+
#include <set>
10+
#include <string>
11+
12+
using namespace llvm;
13+
14+
cl::opt<std::string> BCPath("bcpath", cl::init(""), cl::Hidden,
15+
cl::desc("Path to BC definitions"));
16+
17+
namespace {
18+
class BCLoader : public ModulePass {
19+
public:
20+
static char ID;
21+
BCLoader() : ModulePass(ID) {}
22+
23+
bool runOnModule(Module &M) override {
24+
std::set<std::string> bcfuncs = {"cblas_ddot"};
25+
for (std::string name : bcfuncs) {
26+
if (name == "cblas_ddot") {
27+
SMDiagnostic Err;
28+
#if LLVM_VERSION_MAJOR <= 10
29+
auto BC = llvm::parseIRFile(
30+
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(), true,
31+
M.getDataLayout().getStringRepresentation());
32+
#else
33+
auto BC = llvm::parseIRFile(
34+
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(),
35+
[&](StringRef) {
36+
return Optional<std::string>(
37+
M.getDataLayout().getStringRepresentation());
38+
});
39+
#endif
40+
if (!BC)
41+
Err.print("bcloader", llvm::errs());
42+
assert(BC);
43+
Linker L(M);
44+
L.linkInModule(std::move(BC));
45+
}
46+
}
47+
return true;
48+
}
49+
};
50+
} // namespace
51+
52+
char BCLoader::ID = 0;
53+
54+
static RegisterPass<BCLoader> X("bcloader",
55+
"Link bitcode files for known functions");
56+
57+
ModulePass *createBCLoaderPass() { return new BCLoader(); }

enzyme/BCLoad/BCLoader.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#include "llvm/Pass.h"
2+
3+
llvm::ModulePass *createBCLoaderPass();

enzyme/BCLoad/CMakeLists.txt

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
set(CMAKE_CXX_STANDARD 17)
2+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
3+
4+
if (${LLVM_VERSION_MAJOR} LESS 8)
5+
add_llvm_loadable_module( BCPass-${LLVM_VERSION_MAJOR}
6+
BCLoader.cpp ClangBCLoader.cpp
7+
DEPENDS
8+
intrinsics_gen
9+
PLUGIN_TOOL
10+
opt
11+
)
12+
else()
13+
# on windows `PLUGIN_TOOL` doesn't link against LLVM.dll
14+
if ((WIN32 OR CYGWIN) AND LLVM_LINK_LLVM_DYLIB)
15+
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
16+
BCLoader.cpp ClangBCLoader.cpp
17+
MODULE
18+
DEPENDS
19+
intrinsics_gen
20+
LINK_COMPONENTS
21+
LLVM
22+
)
23+
else()
24+
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
25+
BCLoader.cpp ClangBCLoader.cpp
26+
MODULE
27+
DEPENDS
28+
intrinsics_gen
29+
PLUGIN_TOOL
30+
opt
31+
)
32+
endif()
33+
endif()
34+
35+
if (APPLE)
36+
# Darwin-specific linker flags for loadable modules.
37+
set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES
38+
LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
39+
endif()

enzyme/BCLoad/ClangBCLoader.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#include "llvm/IR/LegacyPassManager.h"
2+
#include "llvm/Transforms/IPO/PassManagerBuilder.h"
3+
4+
#include "BCLoader.h"
5+
6+
#include "llvm/LinkAllPasses.h"
7+
8+
using namespace llvm;
9+
10+
// This function is of type PassManagerBuilder::ExtensionFn
11+
static void loadPass(const PassManagerBuilder &Builder,
12+
legacy::PassManagerBase &PM) {
13+
PM.add(createBCLoaderPass());
14+
}
15+
16+
// These constructors add our pass to a list of global extensions.
17+
static RegisterStandardPasses
18+
clangtoolLoader_Ox(PassManagerBuilder::EP_ModuleOptimizerEarly, loadPass);
19+
static RegisterStandardPasses
20+
clangtoolLoader_O0(PassManagerBuilder::EP_EnabledOnOptLevel0, loadPass);

enzyme/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/include/SCEV/ScalarEvolutionExpander.h"
8989
include_directories("${CMAKE_CURRENT_BINARY_DIR}/include")
9090

9191
add_subdirectory(Enzyme)
92+
add_subdirectory(BCLoad)
9293
add_subdirectory(test)
9394

9495
# The benchmarks data are not in git-exported source archives to minimize size.

enzyme/bclib/cblas_ddot_double.bc

2.31 KB
Binary file not shown.

enzyme/test/BCLoader/CMakeLists.txt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Run regression and unit tests
2+
add_lit_testsuite(check-bcpass "Running BCPass regression tests"
3+
${CMAKE_CURRENT_BINARY_DIR}
4+
DEPENDS ${ENZYME_TEST_DEPS}
5+
ARGS -v
6+
)
7+
8+
set_target_properties(check-bcpass PROPERTIES FOLDER "Tests")
9+

enzyme/test/BCLoader/bcloader-ddot.ll

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
;RUN: if [ %llvmver -ge 10 ]; then %clang %s -Xclang -load -Xclang %loadBC -mllvm -bcpath=%BClibdir -S -emit-llvm -o - | %FileCheck %s; fi
2+
3+
;#include <cblas.h>
4+
;#include <stdio.h>
5+
;
6+
;extern double __enzyme_autodiff(void *, double *, double *, double *,
7+
; double *);
8+
;
9+
;double g(double *m, double *n) {
10+
; double x = cblas_ddot(3, m, 1, n, 1);
11+
; m[0] = 11.0;
12+
; m[1] = 12.0;
13+
; m[2] = 13.0;
14+
; double y = x * x;
15+
; return y;
16+
;}
17+
;
18+
;int main() {
19+
; double m[3] = {1, 2, 3};
20+
; double m1[3] = {0, 0, 0};
21+
; double n[3] = {4, 5, 6};
22+
; double n1[3] = {0, 0, 0};
23+
; double val = __enzyme_autodiff((void*)g, m, m1, n, n1);
24+
; return 1;
25+
;}
26+
27+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
28+
target triple = "x86_64-unknown-linux-gnu"
29+
30+
@__const.main.m = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 16
31+
@__const.main.n = private unnamed_addr constant [3 x double] [double 4.000000e+00, double 5.000000e+00, double 6.000000e+00], align 16
32+
33+
; Function Attrs: noinline nounwind optnone uwtable
34+
define dso_local double @g(double* %m, double* %n) {
35+
entry:
36+
%m.addr = alloca double*, align 8
37+
%n.addr = alloca double*, align 8
38+
%x = alloca double, align 8
39+
%y = alloca double, align 8
40+
store double* %m, double** %m.addr, align 8
41+
store double* %n, double** %n.addr, align 8
42+
%0 = load double*, double** %m.addr, align 8
43+
%1 = load double*, double** %n.addr, align 8
44+
%call = call double @cblas_ddot(i32 3, double* %0, i32 1, double* %1, i32 1)
45+
store double %call, double* %x, align 8
46+
%2 = load double*, double** %m.addr, align 8
47+
%arrayidx = getelementptr inbounds double, double* %2, i64 0
48+
store double 1.100000e+01, double* %arrayidx, align 8
49+
%3 = load double*, double** %m.addr, align 8
50+
%arrayidx1 = getelementptr inbounds double, double* %3, i64 1
51+
store double 1.200000e+01, double* %arrayidx1, align 8
52+
%4 = load double*, double** %m.addr, align 8
53+
%arrayidx2 = getelementptr inbounds double, double* %4, i64 2
54+
store double 1.300000e+01, double* %arrayidx2, align 8
55+
%5 = load double, double* %x, align 8
56+
%6 = load double, double* %x, align 8
57+
%mul = fmul double %5, %6
58+
store double %mul, double* %y, align 8
59+
%7 = load double, double* %y, align 8
60+
ret double %7
61+
}
62+
63+
declare dso_local double @cblas_ddot(i32, double*, i32, double*, i32)
64+
65+
; Function Attrs: noinline nounwind optnone uwtable
66+
define dso_local i32 @main() {
67+
entry:
68+
%retval = alloca i32, align 4
69+
%m = alloca [3 x double], align 16
70+
%m1 = alloca [3 x double], align 16
71+
%n = alloca [3 x double], align 16
72+
%n1 = alloca [3 x double], align 16
73+
%val = alloca double, align 8
74+
store i32 0, i32* %retval, align 4
75+
%0 = bitcast [3 x double]* %m to i8*
76+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast ([3 x double]* @__const.main.m to i8*), i64 24, i1 false)
77+
%1 = bitcast [3 x double]* %m1 to i8*
78+
call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 24, i1 false)
79+
%2 = bitcast [3 x double]* %n to i8*
80+
call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %2, i8* align 16 bitcast ([3 x double]* @__const.main.n to i8*), i64 24, i1 false)
81+
%3 = bitcast [3 x double]* %n1 to i8*
82+
call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 false)
83+
%arraydecay = getelementptr inbounds [3 x double], [3 x double]* %m, i32 0, i32 0
84+
%arraydecay1 = getelementptr inbounds [3 x double], [3 x double]* %m1, i32 0, i32 0
85+
%arraydecay2 = getelementptr inbounds [3 x double], [3 x double]* %n, i32 0, i32 0
86+
%arraydecay3 = getelementptr inbounds [3 x double], [3 x double]* %n1, i32 0, i32 0
87+
%call = call double @__enzyme_autodiff(i8* bitcast (double (double*, double*)* @g to i8*), double* %arraydecay, double* %arraydecay1, double* %arraydecay2, double* %arraydecay3)
88+
store double %call, double* %val, align 8
89+
ret i32 1
90+
}
91+
92+
; Function Attrs: argmemonly nounwind
93+
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
94+
95+
; Function Attrs: argmemonly nounwind
96+
declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
97+
98+
declare dso_local double @__enzyme_autodiff(i8*, double*, double*, double*, double*)
99+
100+
;CHECK: define dso_local double @cblas_ddot(i32 %__N, double* %__X, i32 %__incX, double* %__Y, i32 %__incY)
101+
;CHECK-NEXT: entry:
102+
;CHECK-NEXT: %__N.addr = alloca i32, align 4
103+
;CHECK-NEXT: %__X.addr = alloca double*, align 8
104+
;CHECK-NEXT: %__incX.addr = alloca i32, align 4
105+
;CHECK-NEXT: %__Y.addr = alloca double*, align 8
106+
;CHECK-NEXT: %__incY.addr = alloca i32, align 4
107+
;CHECK-NEXT: %sum = alloca double, align 8
108+
;CHECK-NEXT: %i = alloca i32, align 4
109+
;CHECK-NEXT: store i32 %__N, i32* %__N.addr, align 4
110+
;CHECK-NEXT: store double* %__X, double** %__X.addr, align 8
111+
;CHECK-NEXT: store i32 %__incX, i32* %__incX.addr, align 4
112+
;CHECK-NEXT: store double* %__Y, double** %__Y.addr, align 8
113+
;CHECK-NEXT: store i32 %__incY, i32* %__incY.addr, align 4
114+
;CHECK-NEXT: store double 0.000000e+00, double* %sum, align 8
115+
;CHECK-NEXT: store i32 0, i32* %i, align 4
116+
;CHECK-NEXT: br label %for.cond
117+
118+
;CHECK: for.cond: ; preds = %for.inc, %entry
119+
;CHECK-NEXT: %0 = load i32, i32* %i, align 4
120+
;CHECK-NEXT: %1 = load i32, i32* %__N.addr, align 4
121+
;CHECK-NEXT: %cmp = icmp slt i32 %0, %1
122+
;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end
123+
124+
;CHECK: for.body: ; preds = %for.cond
125+
;CHECK-NEXT: %2 = load double, double* %sum, align 8
126+
;CHECK-NEXT: %3 = load double*, double** %__X.addr, align 8
127+
;CHECK-NEXT: %4 = load i32, i32* %i, align 4
128+
;CHECK-NEXT: %idxprom = sext i32 %4 to i64
129+
;CHECK-NEXT: %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
130+
;CHECK-NEXT: %5 = load double, double* %arrayidx, align 8
131+
;CHECK-NEXT: %6 = load double*, double** %__Y.addr, align 8
132+
;CHECK-NEXT: %7 = load i32, i32* %i, align 4
133+
;CHECK-NEXT: %idxprom1 = sext i32 %7 to i64
134+
;CHECK-NEXT: %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
135+
;CHECK-NEXT: %8 = load double, double* %arrayidx2, align 8
136+
;CHECK-NEXT: %mul = fmul double %5, %8
137+
;CHECK-NEXT: %add = fadd double %2, %mul
138+
;CHECK-NEXT: store double %add, double* %sum, align 8
139+
;CHECK-NEXT: br label %for.inc
140+
141+
;CHECK: for.inc: ; preds = %for.body
142+
;CHECK-NEXT: %9 = load i32, i32* %i, align 4
143+
;CHECK-NEXT: %inc = add nsw i32 %9, 1
144+
;CHECK-NEXT: store i32 %inc, i32* %i, align 4
145+
;CHECK-NEXT: br label %for.cond
146+
147+
;CHECK: for.end: ; preds = %for.cond
148+
;CHECK-NEXT: %10 = load double, double* %sum, align 8
149+
;CHECK-NEXT: ret double %10
150+
;CHECK-NEXT: }

enzyme/test/CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,15 @@ configure_lit_site_cfg(
55
${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
66
)
77

8-
set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR})
8+
set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR} BCPass-${LLVM_VERSION_MAJOR})
99

1010
add_subdirectory(ActivityAnalysis)
1111
add_subdirectory(TypeAnalysis)
1212
add_subdirectory(Enzyme/ReverseMode)
1313
add_subdirectory(Enzyme/ForwardMode)
1414
add_subdirectory(Integration/ReverseMode)
1515
add_subdirectory(Integration/ForwardMode)
16+
add_subdirectory(BCLoader)
1617

1718
add_custom_target(check-enzyme DEPENDS check-enzyme-reverse check-enzyme-forward)
1819
add_custom_target(check-enzyme-integration DEPENDS check-enzyme-integration-reverse check-enzyme-integration-forward)

enzyme/test/lit.site.cfg.py.in

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@ config.substitutions.append(('%clang', config.llvm_tools_dir + "/clang"))
4545
config.substitutions.append(('%loadEnzyme', ''
4646
+ ' -load=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext
4747
))
48+
config.substitutions.append(('%loadBC', ''
49+
+ ' @ENZYME_BINARY_DIR@/BCLoad/BCPass-' + config.llvm_ver + config.llvm_shlib_ext
50+
))
51+
config.substitutions.append(('%BClibdir', '@ENZYME_SOURCE_DIR@/bclib/'))
4852

4953
# Let the main config do the real work.
5054
lit_config.load_config(config, "@ENZYME_SOURCE_DIR@/test/lit.cfg.py")

0 commit comments

Comments
 (0)