Add pass to load definitions of common functions from bc files (rust-lang#220)

reikdas · web-flow · commit 9be50337dbf9 · 2021-07-10T12:05:40.000-04:00
Starting with cblas_ddot
diff --git a/.github/workflows/bcload.yml b/.github/workflows/bcload.yml
@@ -0,0 +1,47 @@
+name: Bitcode loading CI
+
+on: [push]
+
+jobs:
+  build:
+    name: Bitcode loading CI LLVM ${{ matrix.llvm }} ${{ matrix.build }} ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: false
+      matrix:
+        llvm: ["7", "8", "9", "10", "11", "12"]
+        build: ["Release"] # "RelWithDebInfo"
+        os: [ubuntu-20.04, ubuntu-18.04]
+
+        exclude:
+          # How to install FileCheck on ubuntu-18.04?
+          - os: ubuntu-18.04
+            llvm: 8
+
+    timeout-minutes: 30
+    steps:
+    - name: add llvm
+      run: |
+          wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
+          sudo apt-add-repository "deb http://apt.llvm.org/`lsb_release -c | cut -f2`/ llvm-toolchain-`lsb_release -c | cut -f2`-${{ matrix.llvm }} main" || true
+          sudo apt-get install -y autoconf cmake gcc g++ libtool gfortran llvm-${{ matrix.llvm }}-dev libomp-${{ matrix.llvm }}-dev clang-${{ matrix.llvm }} libeigen3-dev libboost-dev
+          sudo python3 -m pip install --upgrade pip setuptools
+          sudo python3 -m pip install lit
+          sudo touch /usr/lib/llvm-${{ matrix.llvm }}/bin/yaml-bench
+          if [[ '${{ matrix.llvm }}' == '7' || '${{ matrix.llvm }}' == '8' || '${{ matrix.llvm }}' == '9' ]]; then
+            sudo apt-get install -y llvm-${{ matrix.llvm }}-tools
+          fi
+    - uses: actions/checkout@v1
+      with:
+          fetch-depth: 1
+    - name: mkdir
+      run: cd enzyme && rm -rf build && mkdir build
+    - name: cmake
+      run: |
+          cd enzyme/build
+          cmake .. -DLLVM_EXTERNAL_LIT=`which lit` -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DLLVM_DIR=/usr/lib/llvm-${{ matrix.llvm }}/lib/cmake/llvm
+    - name: make
+      run: cd enzyme/build && make -j`nproc`
+    - name: make check-bcpass
+      run: cd enzyme/build && make check-bcpass -j`nproc`
diff --git a/enzyme/BCLoad/BCLoader.cpp b/enzyme/BCLoad/BCLoader.cpp
@@ -0,0 +1,57 @@
+#include "llvm/IR/Module.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <set>
+#include <string>
+
+using namespace llvm;
+
+cl::opt<std::string> BCPath("bcpath", cl::init(""), cl::Hidden,
+                            cl::desc("Path to BC definitions"));
+
+namespace {
+class BCLoader : public ModulePass {
+public:
+  static char ID;
+  BCLoader() : ModulePass(ID) {}
+
+  bool runOnModule(Module &M) override {
+    std::set<std::string> bcfuncs = {"cblas_ddot"};
+    for (std::string name : bcfuncs) {
+      if (name == "cblas_ddot") {
+        SMDiagnostic Err;
+#if LLVM_VERSION_MAJOR <= 10
+        auto BC = llvm::parseIRFile(
+            BCPath + "/cblas_ddot_double.bc", Err, M.getContext(), true,
+            M.getDataLayout().getStringRepresentation());
+#else
+        auto BC = llvm::parseIRFile(
+            BCPath + "/cblas_ddot_double.bc", Err, M.getContext(),
+            [&](StringRef) {
+              return Optional<std::string>(
+                  M.getDataLayout().getStringRepresentation());
+            });
+#endif
+        if (!BC)
+          Err.print("bcloader", llvm::errs());
+        assert(BC);
+        Linker L(M);
+        L.linkInModule(std::move(BC));
+      }
+    }
+    return true;
+  }
+};
+} // namespace
+
+char BCLoader::ID = 0;
+
+static RegisterPass<BCLoader> X("bcloader",
+                                "Link bitcode files for known functions");
+
+ModulePass *createBCLoaderPass() { return new BCLoader(); }
diff --git a/enzyme/BCLoad/BCLoader.h b/enzyme/BCLoad/BCLoader.h
@@ -0,0 +1,3 @@
+#include "llvm/Pass.h"
+
+llvm::ModulePass *createBCLoaderPass();
diff --git a/enzyme/BCLoad/CMakeLists.txt b/enzyme/BCLoad/CMakeLists.txt
@@ -0,0 +1,39 @@
+set(CMAKE_CXX_STANDARD 17)
+set(CMAKE_CXX_STANDARD_REQUIRED ON)
+
+if (${LLVM_VERSION_MAJOR} LESS 8)
+    add_llvm_loadable_module( BCPass-${LLVM_VERSION_MAJOR}
+        BCLoader.cpp ClangBCLoader.cpp
+        DEPENDS
+        intrinsics_gen
+        PLUGIN_TOOL
+        opt
+    )
+else()
+# on windows `PLUGIN_TOOL` doesn't link against LLVM.dll
+if ((WIN32 OR CYGWIN) AND LLVM_LINK_LLVM_DYLIB)
+    add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
+        BCLoader.cpp ClangBCLoader.cpp
+        MODULE
+        DEPENDS
+        intrinsics_gen
+	LINK_COMPONENTS
+	LLVM
+    )
+else()
+    add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
+        BCLoader.cpp ClangBCLoader.cpp
+        MODULE
+        DEPENDS
+        intrinsics_gen
+        PLUGIN_TOOL
+        opt
+    )
+endif()
+endif()
+
+if (APPLE)
+# Darwin-specific linker flags for loadable modules.
+set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES
+    LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
+endif()
diff --git a/enzyme/BCLoad/ClangBCLoader.cpp b/enzyme/BCLoad/ClangBCLoader.cpp
@@ -0,0 +1,20 @@
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/Transforms/IPO/PassManagerBuilder.h"
+
+#include "BCLoader.h"
+
+#include "llvm/LinkAllPasses.h"
+
+using namespace llvm;
+
+// This function is of type PassManagerBuilder::ExtensionFn
+static void loadPass(const PassManagerBuilder &Builder,
+                     legacy::PassManagerBase &PM) {
+  PM.add(createBCLoaderPass());
+}
+
+// These constructors add our pass to a list of global extensions.
+static RegisterStandardPasses
+    clangtoolLoader_Ox(PassManagerBuilder::EP_ModuleOptimizerEarly, loadPass);
+static RegisterStandardPasses
+    clangtoolLoader_O0(PassManagerBuilder::EP_EnabledOnOptLevel0, loadPass);
diff --git a/enzyme/CMakeLists.txt b/enzyme/CMakeLists.txt
@@ -89,6 +89,7 @@ file(WRITE "${CMAKE_CURRENT_BINARY_DIR}/include/SCEV/ScalarEvolutionExpander.h"
 include_directories("${CMAKE_CURRENT_BINARY_DIR}/include")
 
 add_subdirectory(Enzyme)
+add_subdirectory(BCLoad)
 add_subdirectory(test)
 
 # The benchmarks data are not in git-exported source archives to minimize size.
diff --git a/enzyme/bclib/cblas_ddot_double.bc b/enzyme/bclib/cblas_ddot_double.bc
diff --git a/enzyme/test/BCLoader/CMakeLists.txt b/enzyme/test/BCLoader/CMakeLists.txt
@@ -0,0 +1,9 @@
+# Run regression and unit tests
+add_lit_testsuite(check-bcpass "Running BCPass regression tests"
+    ${CMAKE_CURRENT_BINARY_DIR}
+    DEPENDS ${ENZYME_TEST_DEPS}
+    ARGS -v
+)
+
+set_target_properties(check-bcpass PROPERTIES FOLDER "Tests")
+
diff --git a/enzyme/test/BCLoader/bcloader-ddot.ll b/enzyme/test/BCLoader/bcloader-ddot.ll
@@ -0,0 +1,150 @@
+;RUN: if [ %llvmver -ge 10 ]; then %clang %s -Xclang -load -Xclang %loadBC -mllvm -bcpath=%BClibdir -S -emit-llvm -o - | %FileCheck %s; fi
+
+;#include <cblas.h>
+;#include <stdio.h>
+;
+;extern double __enzyme_autodiff(void *, double *, double *, double *,
+;                                 double *);
+;
+;double g(double *m, double *n) {
+;  double x = cblas_ddot(3, m, 1, n, 1);
+;  m[0] = 11.0;
+;  m[1] = 12.0;
+;  m[2] = 13.0;
+;  double y = x * x;
+;  return y;
+;}
+;
+;int main() {
+;  double m[3] = {1, 2, 3};
+;  double m1[3] = {0, 0, 0};
+;  double n[3] = {4, 5, 6};
+;  double n1[3] = {0, 0, 0};
+;  double val = __enzyme_autodiff((void*)g, m, m1, n, n1);
+;  return 1;
+;}
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@__const.main.m = private unnamed_addr constant [3 x double] [double 1.000000e+00, double 2.000000e+00, double 3.000000e+00], align 16
+@__const.main.n = private unnamed_addr constant [3 x double] [double 4.000000e+00, double 5.000000e+00, double 6.000000e+00], align 16
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local double @g(double* %m, double* %n) {
+entry:
+  %m.addr = alloca double*, align 8
+  %n.addr = alloca double*, align 8
+  %x = alloca double, align 8
+  %y = alloca double, align 8
+  store double* %m, double** %m.addr, align 8
+  store double* %n, double** %n.addr, align 8
+  %0 = load double*, double** %m.addr, align 8
+  %1 = load double*, double** %n.addr, align 8
+  %call = call double @cblas_ddot(i32 3, double* %0, i32 1, double* %1, i32 1)
+  store double %call, double* %x, align 8
+  %2 = load double*, double** %m.addr, align 8
+  %arrayidx = getelementptr inbounds double, double* %2, i64 0
+  store double 1.100000e+01, double* %arrayidx, align 8
+  %3 = load double*, double** %m.addr, align 8
+  %arrayidx1 = getelementptr inbounds double, double* %3, i64 1
+  store double 1.200000e+01, double* %arrayidx1, align 8
+  %4 = load double*, double** %m.addr, align 8
+  %arrayidx2 = getelementptr inbounds double, double* %4, i64 2
+  store double 1.300000e+01, double* %arrayidx2, align 8
+  %5 = load double, double* %x, align 8
+  %6 = load double, double* %x, align 8
+  %mul = fmul double %5, %6
+  store double %mul, double* %y, align 8
+  %7 = load double, double* %y, align 8
+  ret double %7
+}
+
+declare dso_local double @cblas_ddot(i32, double*, i32, double*, i32)
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local i32 @main() {
+entry:
+  %retval = alloca i32, align 4
+  %m = alloca [3 x double], align 16
+  %m1 = alloca [3 x double], align 16
+  %n = alloca [3 x double], align 16
+  %n1 = alloca [3 x double], align 16
+  %val = alloca double, align 8
+  store i32 0, i32* %retval, align 4
+  %0 = bitcast [3 x double]* %m to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %0, i8* align 16 bitcast ([3 x double]* @__const.main.m to i8*), i64 24, i1 false)
+  %1 = bitcast [3 x double]* %m1 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 16 %1, i8 0, i64 24, i1 false)
+  %2 = bitcast [3 x double]* %n to i8*
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 %2, i8* align 16 bitcast ([3 x double]* @__const.main.n to i8*), i64 24, i1 false)
+  %3 = bitcast [3 x double]* %n1 to i8*
+  call void @llvm.memset.p0i8.i64(i8* align 16 %3, i8 0, i64 24, i1 false)
+  %arraydecay = getelementptr inbounds [3 x double], [3 x double]* %m, i32 0, i32 0
+  %arraydecay1 = getelementptr inbounds [3 x double], [3 x double]* %m1, i32 0, i32 0
+  %arraydecay2 = getelementptr inbounds [3 x double], [3 x double]* %n, i32 0, i32 0
+  %arraydecay3 = getelementptr inbounds [3 x double], [3 x double]* %n1, i32 0, i32 0
+  %call = call double @__enzyme_autodiff(i8* bitcast (double (double*, double*)* @g to i8*), double* %arraydecay, double* %arraydecay1, double* %arraydecay2, double* %arraydecay3)
+  store double %call, double* %val, align 8
+  ret i32 1
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i1)
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i1)
+
+declare dso_local double @__enzyme_autodiff(i8*, double*, double*, double*, double*)
+
+;CHECK: define dso_local double @cblas_ddot(i32 %__N, double* %__X, i32 %__incX, double* %__Y, i32 %__incY)
+;CHECK-NEXT: entry:
+;CHECK-NEXT:  %__N.addr = alloca i32, align 4
+;CHECK-NEXT:  %__X.addr = alloca double*, align 8
+;CHECK-NEXT:  %__incX.addr = alloca i32, align 4
+;CHECK-NEXT:  %__Y.addr = alloca double*, align 8
+;CHECK-NEXT:  %__incY.addr = alloca i32, align 4
+;CHECK-NEXT:  %sum = alloca double, align 8
+;CHECK-NEXT:  %i = alloca i32, align 4
+;CHECK-NEXT:  store i32 %__N, i32* %__N.addr, align 4
+;CHECK-NEXT:  store double* %__X, double** %__X.addr, align 8
+;CHECK-NEXT:  store i32 %__incX, i32* %__incX.addr, align 4
+;CHECK-NEXT:  store double* %__Y, double** %__Y.addr, align 8
+;CHECK-NEXT:  store i32 %__incY, i32* %__incY.addr, align 4
+;CHECK-NEXT:  store double 0.000000e+00, double* %sum, align 8
+;CHECK-NEXT:  store i32 0, i32* %i, align 4
+;CHECK-NEXT:  br label %for.cond
+
+;CHECK: for.cond:                                         ; preds = %for.inc, %entry
+;CHECK-NEXT:   %0 = load i32, i32* %i, align 4
+;CHECK-NEXT:   %1 = load i32, i32* %__N.addr, align 4
+;CHECK-NEXT:   %cmp = icmp slt i32 %0, %1
+;CHECK-NEXT:   br i1 %cmp, label %for.body, label %for.end
+
+;CHECK: for.body:                                         ; preds = %for.cond
+;CHECK-NEXT:   %2 = load double, double* %sum, align 8
+;CHECK-NEXT:   %3 = load double*, double** %__X.addr, align 8
+;CHECK-NEXT:   %4 = load i32, i32* %i, align 4
+;CHECK-NEXT:   %idxprom = sext i32 %4 to i64
+;CHECK-NEXT:   %arrayidx = getelementptr inbounds double, double* %3, i64 %idxprom
+;CHECK-NEXT:   %5 = load double, double* %arrayidx, align 8
+;CHECK-NEXT:   %6 = load double*, double** %__Y.addr, align 8
+;CHECK-NEXT:   %7 = load i32, i32* %i, align 4
+;CHECK-NEXT:   %idxprom1 = sext i32 %7 to i64
+;CHECK-NEXT:   %arrayidx2 = getelementptr inbounds double, double* %6, i64 %idxprom1
+;CHECK-NEXT:   %8 = load double, double* %arrayidx2, align 8
+;CHECK-NEXT:   %mul = fmul double %5, %8
+;CHECK-NEXT:   %add = fadd double %2, %mul
+;CHECK-NEXT:   store double %add, double* %sum, align 8
+;CHECK-NEXT:   br label %for.inc
+
+;CHECK: for.inc:                                          ; preds = %for.body
+;CHECK-NEXT:   %9 = load i32, i32* %i, align 4
+;CHECK-NEXT:   %inc = add nsw i32 %9, 1
+;CHECK-NEXT:   store i32 %inc, i32* %i, align 4
+;CHECK-NEXT:   br label %for.cond
+
+;CHECK: for.end:                                          ; preds = %for.cond
+;CHECK-NEXT:   %10 = load double, double* %sum, align 8
+;CHECK-NEXT:   ret double %10
+;CHECK-NEXT: }
diff --git a/enzyme/test/CMakeLists.txt b/enzyme/test/CMakeLists.txt
@@ -5,14 +5,15 @@ configure_lit_site_cfg(
   ${CMAKE_CURRENT_SOURCE_DIR}/lit.cfg.py
 )
 
-set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR})
+set(ENZYME_TEST_DEPS LLVMEnzyme-${LLVM_VERSION_MAJOR} BCPass-${LLVM_VERSION_MAJOR})
 
 add_subdirectory(ActivityAnalysis)
 add_subdirectory(TypeAnalysis)
 add_subdirectory(Enzyme/ReverseMode)
 add_subdirectory(Enzyme/ForwardMode)
 add_subdirectory(Integration/ReverseMode)
 add_subdirectory(Integration/ForwardMode)
+add_subdirectory(BCLoader)
 
 add_custom_target(check-enzyme DEPENDS check-enzyme-reverse check-enzyme-forward)
 add_custom_target(check-enzyme-integration DEPENDS check-enzyme-integration-reverse check-enzyme-integration-forward)
diff --git a/enzyme/test/lit.site.cfg.py.in b/enzyme/test/lit.site.cfg.py.in
@@ -45,6 +45,10 @@ config.substitutions.append(('%clang', config.llvm_tools_dir + "/clang"))
 config.substitutions.append(('%loadEnzyme', ''
                                  + ' -load=@ENZYME_BINARY_DIR@/Enzyme/LLVMEnzyme-' + config.llvm_ver + config.llvm_shlib_ext
                                  ))
+config.substitutions.append(('%loadBC', ''
+                                 + ' @ENZYME_BINARY_DIR@/BCLoad/BCPass-' + config.llvm_ver + config.llvm_shlib_ext
+                                 ))
+config.substitutions.append(('%BClibdir', '@ENZYME_SOURCE_DIR@/bclib/'))
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@ENZYME_SOURCE_DIR@/test/lit.cfg.py")

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+#include "llvm/Pass.h"`
	`2`	`+`
	`3`	`+llvm::ModulePass *createBCLoaderPass();`