Skip to content

Commit 12ebbf4

Browse files
wsmosesvchuravy
andauthored
Compile-Time CBLAS + Fortran BLAS Bitcode Injection (rust-lang#628)
* Compile time cblas loading * Now with fortran abi * Add shared lib * Add to julia tarballs * Add FBlas test * Fix CI * Add missing file * prepare for cross-compile support * update build_tarballs.jl * add to TARGETS * allow BC nonsense without clang if blasheaders is pregenerated * Revert "add to TARGETS" This reverts commit 59fd884acb7b48e7bc0df2ca7edbac51dadf8071. * fix * Simplify cmake build Co-authored-by: Valentin Churavy <[email protected]>
1 parent 93ca0b7 commit 12ebbf4

File tree

10 files changed

+279
-147
lines changed

10 files changed

+279
-147
lines changed

.packaging/build_tarballs.jl

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,9 +23,28 @@ platforms = expand_cxxstring_abis(supported_platforms(; experimental=true))
2323
# Bash recipe for building across all platforms
2424
script = raw"""
2525
cd Enzyme
26-
# install_license LICENSE.TXT
26+
27+
# 1. Build HOST
28+
NATIVE_CMAKE_FLAGS=()
29+
NATIVE_CMAKE_FLAGS+=(-DENZYME_CLANG=ON)
30+
NATIVE_CMAKE_FLAGS+=(-DCMAKE_BUILD_TYPE=RelWithDebInfo)
31+
NATIVE_CMAKE_FLAGS+=(-DCMAKE_CROSSCOMPILING:BOOL=OFF)
32+
# Install things into $host_prefix
33+
NATIVE_CMAKE_FLAGS+=(-DCMAKE_TOOLCHAIN_FILE=${CMAKE_HOST_TOOLCHAIN})
34+
NATIVE_CMAKE_FLAGS+=(-DCMAKE_INSTALL_PREFIX=${host_prefix})
35+
# Tell CMake where LLVM is
36+
NATIVE_CMAKE_FLAGS+=(-DLLVM_DIR="${host_prefix}/lib/cmake/llvm")
37+
NATIVE_CMAKE_FLAGS+=(-DBC_LOAD_FLAGS="-target ${target} --sysroot=/opt/${target}/${target}/sys-root --gcc-toolchain=/opt/${target}")
38+
39+
cmake -B build-native -S enzyme -GNinja "${NATIVE_CMAKE_FLAGS[@]}"
40+
41+
# Only build blasheaders (and eventually tblgen)
42+
ninja -C build-native -j ${nproc} blasheaders
43+
44+
# 2. Cross-compile
2745
CMAKE_FLAGS=()
2846
CMAKE_FLAGS+=(-DENZYME_EXTERNAL_SHARED_LIB=ON)
47+
CMAKE_FLAGS+=(-DBC_LOAD_HEADER=`pwd`/build-native/BCLoad/gsl/blas_headers.h)
2948
CMAKE_FLAGS+=(-DENZYME_CLANG=OFF)
3049
# RelWithDebInfo for decent performance, with debugability
3150
CMAKE_FLAGS+=(-DCMAKE_BUILD_TYPE=RelWithDebInfo)
@@ -43,7 +62,9 @@ CMAKE_FLAGS+=(-DBUILD_SHARED_LIBS=ON)
4362
if [[ "${target}" == x86_64-apple* ]]; then
4463
CMAKE_FLAGS+=(-DCMAKE_OSX_DEPLOYMENT_TARGET:STRING=10.12)
4564
fi
65+
4666
cmake -B build -S enzyme -GNinja ${CMAKE_FLAGS[@]}
67+
4768
ninja -C build -j ${nproc} install
4869
"""
4970

@@ -60,12 +81,14 @@ for llvm_version in llvm_versions, llvm_assertions in (false, true)
6081
# Dependencies that must be installed before this package can be built
6182
llvm_name = llvm_assertions ? "LLVM_full_assert_jll" : "LLVM_full_jll"
6283
dependencies = [
84+
HostBuildDependency(PackageSpec(name=llvm_name, version=llvm_version)),
6385
BuildDependency(PackageSpec(name=llvm_name, version=llvm_version))
6486
]
6587

6688
# The products that we will ensure are always built
6789
products = Product[
6890
LibraryProduct(["libEnzyme-$(llvm_version.major)", "libEnzyme"], :libEnzyme, dont_dlopen=true),
91+
LibraryProduct(["libEnzymeBCLoad-$(llvm_version.major)", "libEnzymeBCLoad"], :libEnzymeBCLoad, dont_dlopen=true),
6992
]
7093

7194
for platform in platforms

enzyme/BCLoad/BCLoader.cpp

Lines changed: 81 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,46 +6,99 @@
66
#include "llvm/Support/SourceMgr.h"
77
#include "llvm/Support/raw_ostream.h"
88

9+
#include <map>
910
#include <set>
1011
#include <string>
1112

1213
using namespace llvm;
1314

14-
cl::opt<std::string> BCPath("bcpath", cl::init(""), cl::Hidden,
15-
cl::desc("Path to BC definitions"));
15+
#define DATA EnzymeBlasBC
16+
#include "blas_headers.h"
17+
#undef DATA
1618

17-
namespace {
18-
class BCLoader : public ModulePass {
19-
public:
20-
static char ID;
21-
BCLoader() : ModulePass(ID) {}
19+
bool provideDefinitions(Module &M) {
20+
std::vector<const char *> todo;
21+
bool seen32 = false;
22+
bool seen64 = false;
23+
for (auto &F : M) {
24+
if (!F.empty())
25+
continue;
26+
int index = 0;
27+
for (auto postfix : {"", "_", "_64_"}) {
28+
std::string str;
29+
if (strlen(postfix) == 0)
30+
str = F.getName().str();
31+
else if (F.getName().endswith(postfix)) {
32+
str = "cblas_" +
33+
F.getName().substr(0, F.getName().size() - strlen(postfix)).str();
34+
}
35+
36+
auto found = EnzymeBlasBC.find(str);
37+
if (found != EnzymeBlasBC.end()) {
38+
todo.push_back(found->second);
39+
if (index == 1)
40+
seen32 = true;
41+
if (index == 2)
42+
seen64 = true;
43+
break;
44+
}
45+
index++;
46+
}
47+
}
48+
49+
// Push fortran wrapper libs before all the other blas
50+
// to ensure the fortran injections have their code
51+
// replaced
52+
if (seen32)
53+
todo.insert(todo.begin(), __data_fblas32);
54+
if (seen64)
55+
todo.insert(todo.begin(), __data_fblas64);
56+
bool changed = false;
57+
for (auto mod : todo) {
58+
SMDiagnostic Err;
59+
MemoryBufferRef buf(StringRef(mod), StringRef("bcloader"));
2260

23-
bool runOnModule(Module &M) override {
24-
std::set<std::string> bcfuncs = {"cblas_ddot"};
25-
for (std::string name : bcfuncs) {
26-
if (name == "cblas_ddot") {
27-
SMDiagnostic Err;
2861
#if LLVM_VERSION_MAJOR <= 10
29-
auto BC = llvm::parseIRFile(
30-
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(), true,
31-
M.getDataLayout().getStringRepresentation());
62+
auto BC = llvm::parseIR(buf, Err, M.getContext(), true,
63+
M.getDataLayout().getStringRepresentation());
3264
#else
33-
auto BC = llvm::parseIRFile(
34-
BCPath + "/cblas_ddot_double.bc", Err, M.getContext(),
35-
[&](StringRef) {
36-
return Optional<std::string>(
37-
M.getDataLayout().getStringRepresentation());
38-
});
65+
auto BC = llvm::parseIR(buf, Err, M.getContext(), [&](StringRef) {
66+
return Optional<std::string>(M.getDataLayout().getStringRepresentation());
67+
});
3968
#endif
40-
if (!BC)
41-
Err.print("bcloader", llvm::errs());
42-
assert(BC);
43-
Linker L(M);
44-
L.linkInModule(std::move(BC));
45-
}
69+
if (!BC)
70+
Err.print("bcloader", llvm::errs());
71+
assert(BC);
72+
SmallVector<std::string, 1> toReplace;
73+
for (auto &F : *BC) {
74+
if (F.empty())
75+
continue;
76+
toReplace.push_back(F.getName().str());
77+
}
78+
Linker L(M);
79+
L.linkInModule(std::move(BC));
80+
for (auto name : toReplace) {
81+
if (auto F = M.getFunction(name))
82+
F->setLinkage(Function::LinkageTypes::InternalLinkage);
4683
}
47-
return true;
84+
changed = true;
4885
}
86+
return changed;
87+
}
88+
89+
extern "C" {
90+
uint8_t EnzymeBitcodeReplacement(LLVMModuleRef M) {
91+
return provideDefinitions(*unwrap(M));
92+
}
93+
}
94+
95+
namespace {
96+
class BCLoader : public ModulePass {
97+
public:
98+
static char ID;
99+
BCLoader() : ModulePass(ID) {}
100+
101+
bool runOnModule(Module &M) override { return provideDefinitions(M); }
49102
};
50103
} // namespace
51104

enzyme/BCLoad/CMakeLists.txt

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,62 @@
11
set(CMAKE_CXX_STANDARD 17)
22
set(CMAKE_CXX_STANDARD_REQUIRED ON)
33

4+
set(BC_LOAD_FLAGS "" CACHE STRING "")
5+
set(BC_LOAD_HEADER "" CACHE STRING "")
6+
7+
if (NOT ("${BC_LOAD_HEADER}" STREQUAL ""))
8+
add_custom_target(blasheaders cp ${BC_LOAD_HEADER} "${CMAKE_CURRENT_BINARY_DIR}/gsl/blas_headers.h")
9+
set_target_properties(blasheaders PROPERTIES EXCLUDE_FROM_ALL TRUE)
10+
elseif (${Clang_FOUND})
11+
include(ExternalProject)
12+
ExternalProject_Add(gsl
13+
GIT_REPOSITORY https://github.com/ampl/gsl
14+
GIT_TAG 6e40fb13a501393f3a9deb7c4fcbee85241a0339
15+
PREFIX gsl
16+
BUILD_IN_SOURCE 1
17+
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/gsl/install
18+
CONFIGURE_COMMAND autoreconf -i && ./configure --prefix=${CMAKE_CURRENT_BINARY_DIR}/gsl/install
19+
BUILD_COMMAND sh -c "make -C gsl && $<TARGET_FILE:clang> cblas/*.c ${BC_LOAD_FLAGS} -I . -S -emit-llvm -O1"
20+
INSTALL_COMMAND ""
21+
UPDATE_COMMAND ""
22+
TEST_COMMAND ""
23+
)
24+
set_target_properties(gsl PROPERTIES EXCLUDE_FROM_ALL TRUE)
25+
26+
27+
ExternalProject_Add(openblas
28+
GIT_REPOSITORY https://github.com/xianyi/OpenBLAS
29+
GIT_TAG 18b19d135b0e80219531219a90c1b78dd9249ad0
30+
PREFIX openblas
31+
BUILD_IN_SOURCE 1
32+
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/openblas/install
33+
CONFIGURE_COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/fixopenblas.sh $<TARGET_FILE:clang>
34+
BUILD_COMMAND sh -c "NO_CBLAS=1 GEMM_MULTITHREAD_THRESHOLD=4 COMMON_OPT=\"\" CC=\"$<TARGET_FILE:clang> -DDTB_DEFAULT_ENTRIES=64 -DGEMM_DEFAULT_ALIGN=\"0UL\" -DGEMM_DEFAULT_OFFSET_A=0 -DGEMM_DEFAULT_OFFSET_B=0 -DGEMM_P=0 -DGEMM_Q=0 -DGEMM_MULTITHREAD_THRESHOLD=4 -Dnthreads=1 -S -emit-llvm -O1\" AR=\"echo\" make -C interface VERBOSE=1 NUM_THREADS=1"
35+
INSTALL_COMMAND ""
36+
UPDATE_COMMAND ""
37+
TEST_COMMAND ""
38+
)
39+
set_target_properties(openblas PROPERTIES EXCLUDE_FROM_ALL TRUE)
40+
41+
ExternalProject_Add(fblas
42+
GIT_REPOSITORY https://github.com/UCSantaCruzComputationalGenomicsLab/clapack
43+
GIT_TAG 8bac8d5cd7aa8506b11cdb2cfa2ce8a2e03048f3
44+
PREFIX fblas
45+
BUILD_IN_SOURCE 1
46+
INSTALL_DIR ${CMAKE_CURRENT_BINARY_DIR}/openblas/install
47+
CONFIGURE_COMMAND sh ${CMAKE_CURRENT_SOURCE_DIR}/fixwrapblas.sh
48+
BUILD_COMMAND sh -c "$<TARGET_FILE:clang> ${BC_LOAD_FLAGS} -S -emit-llvm -O1 BLAS/WRAP/bclib32.c BLAS/WRAP/bclib64.c -I ../../INCLUDE"
49+
INSTALL_COMMAND ""
50+
UPDATE_COMMAND ""
51+
TEST_COMMAND ""
52+
)
53+
set_target_properties(fblas PROPERTIES EXCLUDE_FROM_ALL TRUE)
54+
55+
add_custom_target(blasheaders cp "${CMAKE_CURRENT_SOURCE_DIR}/makeblas.cmake" "${CMAKE_CURRENT_BINARY_DIR}/gsl/CMakeLists.txt" && cd "${CMAKE_CURRENT_BINARY_DIR}/gsl" && ${CMAKE_COMMAND} . DEPENDS gsl fblas ${CMAKE_CURRENT_SOURCE_DIR}/makeblas.cmake)
56+
set_target_properties(blasheaders PROPERTIES EXCLUDE_FROM_ALL TRUE)
57+
endif()
58+
59+
if ((NOT ("${BC_LOAD_HEADER}" STREQUAL "")) OR ${Clang_FOUND})
460
if (${LLVM_VERSION_MAJOR} LESS 8)
561
add_llvm_loadable_module( BCPass-${LLVM_VERSION_MAJOR}
662
BCLoader.cpp ClangBCLoader.cpp
@@ -19,6 +75,7 @@ if ((WIN32 OR CYGWIN) AND LLVM_LINK_LLVM_DYLIB)
1975
intrinsics_gen
2076
LINK_COMPONENTS
2177
LLVM
78+
BUILDTREE_ONLY
2279
)
2380
else()
2481
add_llvm_library( BCPass-${LLVM_VERSION_MAJOR}
@@ -28,12 +85,35 @@ else()
2885
intrinsics_gen
2986
PLUGIN_TOOL
3087
opt
88+
BUILDTREE_ONLY
3189
)
3290
endif()
3391
endif()
3492

93+
add_dependencies(BCPass-${LLVM_VERSION_MAJOR} blasheaders)
94+
target_include_directories(BCPass-${LLVM_VERSION_MAJOR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gsl)
95+
3596
if (APPLE)
3697
# Darwin-specific linker flags for loadable modules.
3798
set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES
3899
LINK_FLAGS "-Wl,-flat_namespace -Wl,-undefined -Wl,suppress")
39100
endif()
101+
102+
set_target_properties(BCPass-${LLVM_VERSION_MAJOR} PROPERTIES EXCLUDE_FROM_ALL TRUE)
103+
104+
if (${ENZYME_EXTERNAL_SHARED_LIB})
105+
add_library( EnzymeBCLoad-${LLVM_VERSION_MAJOR}
106+
SHARED
107+
BCLoader.cpp
108+
)
109+
add_dependencies(EnzymeBCLoad-${LLVM_VERSION_MAJOR} blasheaders)
110+
target_include_directories(EnzymeBCLoad-${LLVM_VERSION_MAJOR} PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gsl)
111+
112+
target_link_libraries(EnzymeBCLoad-${LLVM_VERSION_MAJOR} LLVM)
113+
install(TARGETS EnzymeBCLoad-${LLVM_VERSION_MAJOR}
114+
EXPORT EnzymeTargets
115+
LIBRARY DESTINATION lib COMPONENT shlib
116+
PUBLIC_HEADER DESTINATION "${INSTALL_INCLUDE_DIR}/Enzyme"
117+
COMPONENT dev)
118+
endif()
119+
endif()

enzyme/BCLoad/fixopenblas.sh

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
set -x
2+
3+
compiler=`echo $1|sed -e 's/\//\\\\\//g'`
4+
5+
sed "s/compiler_name = .*/compiler_name = \"$compiler\";/g" -i c_check
6+
sed "s/flags = .*/flags = \"\";/g" -i c_check
7+
sed "s/all: getarch_2nd/all: \$(TARGET_CONF) dummy/g" -i Makefile.prebuild
8+
sed "s/f_check getarch/f_check/g" -i Makefile.prebuild
9+
sed "/getarch/d" -i Makefile.prebuild
10+
sed "/avx512/d" -i Makefile.prebuild
11+
sed "s/# GEMM_MULTI/GEMM_MULTI/g" -i Makefile.rule
12+
sed "s/COMMON_OPT = -O2/COMMON_OPT =/g" -i Makefile.system
13+
sed "/#define GEMM_P/d" -i common_param.h
14+
sed "/#define GEMM_Q/d" -i common_param.h
15+
echo > exports/gensymbol

enzyme/BCLoad/fixwrapblas.sh

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
sed "s/f2c_\(.*\)(/\1_EXTRAWIDTH(/g" -i BLAS/WRAP/cblaswr.c
2+
echo "#include <stdint.h>" >> BLAS/WRAP/stage.c
3+
cat INCLUDE/f2c.h >> BLAS/WRAP/stage.c
4+
cat BLAS/WRAP/cblaswr.c >> BLAS/WRAP/stage.c
5+
6+
sed "s/#include \"f2c.h\"//g" -i BLAS/WRAP/stage.c
7+
8+
sed "s/typedef long int integer;/typedef int32_t integer;/g" BLAS/WRAP/stage.c | sed "s/EXTRAWIDTH//g" > BLAS/WRAP/bclib32.c
9+
sed "s/typedef long int integer;/typedef int64_t integer;/g" BLAS/WRAP/stage.c | sed "s/EXTRAWIDTH/64_/g" > BLAS/WRAP/bclib64.c

enzyme/BCLoad/makeblas.cmake

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
cmake_minimum_required(VERSION 3.9)
2+
project(BLASHeader)
3+
4+
file(GLOB BLAS_LL "${CMAKE_CURRENT_SOURCE_DIR}/src/gsl/*.ll")
5+
set (NEED_COMMA FALSE)
6+
list(FILTER BLAS_LL EXCLUDE REGEX ".*test.*")
7+
8+
file(WRITE ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "")
9+
foreach(file ${BLAS_LL})
10+
get_filename_component(variableName ${file} NAME_WE)
11+
12+
file(READ ${file} hexString HEX)
13+
14+
# set(hexString "${hexString}00")
15+
16+
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," arrayValues ${hexString})
17+
string(REGEX REPLACE ",$" "" arrayValues ${arrayValues})
18+
19+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "const char __data_${variableName}[] = {${arrayValues}};\n")
20+
endforeach()
21+
22+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "std::map<std::string, const char*> DATA = {\n")
23+
foreach(file ${BLAS_LL})
24+
get_filename_component(variableName ${file} NAME_WE)
25+
# declares byte array and the length variables
26+
if (${NEED_COMMA})
27+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h ",\n")
28+
endif()
29+
set(arrayDefinition "{ \"cblas_${variableName}\", __data_${variableName} }")
30+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "${arrayDefinition}")
31+
set (NEED_COMMA TRUE)
32+
endforeach()
33+
file(APPEND ${CMAKE_CURRENT_BINARY_DIR}/blas_headers.h "\n};\n")
34+
35+
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../fblas/src/fblas/bclib32.ll" hexString HEX)
36+
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," arrayValues ${hexString})
37+
string(REGEX REPLACE ",$" "" arrayValues ${arrayValues})
38+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "const char __data_fblas32[] = {${arrayValues}};\n")
39+
40+
file(READ "${CMAKE_CURRENT_SOURCE_DIR}/../fblas/src/fblas/bclib64.ll" hexString HEX)
41+
string(REGEX REPLACE "([0-9a-f][0-9a-f])" "0x\\1," arrayValues ${hexString})
42+
string(REGEX REPLACE ",$" "" arrayValues ${arrayValues})
43+
file(APPEND ${CMAKE_CURRENT_SOURCE_DIR}/blas_headers.h "const char __data_fblas64[] = {${arrayValues}};\n")
44+

enzyme/test/BCLoader/CMakeLists.txt

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
# Run regression and unit tests
2+
if (${Clang_FOUND})
23
add_lit_testsuite(check-bcpass "Running BCPass regression tests"
34
${CMAKE_CURRENT_BINARY_DIR}
4-
DEPENDS ${ENZYME_TEST_DEPS}
5+
DEPENDS BCPass-${LLVM_VERSION_MAJOR}
56
ARGS -v
67
)
78

89
set_target_properties(check-bcpass PROPERTIES FOLDER "Tests")
9-
10+
endif()

0 commit comments

Comments
 (0)