Skip to content

Commit b998dca

Browse files
authored
Merge pull request #154 from kon72/emscripten_patch
Get the arguments emcc would pass to clang
2 parents 388cc00 + 87d0cf6 commit b998dca

File tree

4 files changed

+77
-25
lines changed

4 files changed

+77
-25
lines changed

BUILD

+6
Original file line numberDiff line numberDiff line change
@@ -26,3 +26,9 @@ filegroup(
2626
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.
2727

2828
exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
29+
30+
cc_binary(
31+
name = "print_args",
32+
srcs = ["print_args.cpp"],
33+
visibility = ["//visibility:public"],
34+
)

print_args.cpp

+13
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
// Prints the arguments passed to the script
2+
3+
#include <iostream>
4+
5+
int main(int argc, char *argv[]) {
6+
std::cout << "===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===\n";
7+
for (int i = 1; i < argc; ++i) {
8+
std::cout << argv[i] << "\n";
9+
}
10+
std::cout << "===HEDRON_COMPILE_COMMANDS_END_ARGS===\n";
11+
// We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper.
12+
return 1;
13+
}

refresh.template.py

+55-25
Original file line numberDiff line numberDiff line change
@@ -283,17 +283,6 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke
283283
return headers, should_cache
284284

285285

286-
@functools.lru_cache(maxsize=None)
287-
def _get_clang_or_gcc():
288-
"""Returns clang or gcc, if you have one of them on your path."""
289-
if shutil.which('clang'):
290-
return 'clang'
291-
elif shutil.which('gcc'):
292-
return 'gcc'
293-
else:
294-
return None
295-
296-
297286
def windows_list2cmdline(seq):
298287
"""
299288
Copied from list2cmdline in https://github.com/python/cpython/blob/main/Lib/subprocess.py because we need it but it's not exported as part of the public API.
@@ -567,18 +556,7 @@ def _get_headers(compile_action, source_path: str):
567556
if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
568557
headers, should_cache = _get_headers_msvc(compile_action.arguments, source_path)
569558
else:
570-
# Emscripten is tricky. There isn't an easy way to make it emcc run without lots of environment variables.
571-
# So...rather than doing our usual script unwrapping, we just swap in clang/gcc and use that to get headers, knowing that they'll accept the same argument format.
572-
# You can unwrap emcc.sh to emcc.py via next(pathlib.Path('external').glob('emscripten_bin_*/emscripten/emcc.py')).as_posix()
573-
# But then the underlying emcc needs a configuration file that itself depends on lots of environment variables.
574-
# If we ever pick this back up, note that you can supply that config via compile_args += ["--em-config", "external/emsdk/emscripten_toolchain/emscripten_config"]
575-
args = compile_action.arguments
576-
if args[0].endswith('emcc.sh') or args[0].endswith('emcc.bat'):
577-
alternate_compiler = _get_clang_or_gcc()
578-
if not alternate_compiler: return set() # Skip getting headers.
579-
args = args.copy()
580-
args[0] = alternate_compiler
581-
headers, should_cache = _get_headers_gcc(args, source_path, compile_action.actionKey)
559+
headers, should_cache = _get_headers_gcc(compile_action.arguments, source_path, compile_action.actionKey)
582560

583561
# Cache for future use
584562
if output_file and should_cache:
@@ -767,6 +745,54 @@ def _apple_platform_patch(compile_args: typing.List[str]):
767745
return compile_args
768746

769747

748+
def _emscripten_platform_patch(compile_action):
749+
"""De-Bazel the command into something clangd can parse.
750+
751+
This function has fixes specific to Emscripten platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not
752+
"""
753+
emcc_driver = pathlib.Path(compile_action.arguments[0])
754+
if not emcc_driver.name.startswith('emcc'):
755+
return compile_action.arguments
756+
757+
workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
758+
759+
environment = compile_action.environmentVariables.copy()
760+
environment['EXT_BUILD_ROOT'] = str(workspace_absolute)
761+
environment['EMCC_SKIP_SANITY_CHECK'] = '1'
762+
environment['EM_COMPILER_WRAPPER'] = str(pathlib.PurePath({print_args_executable}))
763+
if 'PATH' not in environment:
764+
environment['PATH'] = os.environ['PATH']
765+
766+
# We run the emcc process with the environment variable EM_COMPILER_WRAPPER to intercept the command line arguments passed to `clang`.
767+
emcc_process = subprocess.run(
768+
# On Windows, it fails to spawn the subprocess when the path uses forward slashes as a separator.
769+
# Here, we convert emcc driver path to use the native path separator.
770+
[str(emcc_driver)] + compile_action.arguments[1:],
771+
# MIN_PY=3.7: Replace PIPEs with capture_output.
772+
stdout=subprocess.PIPE,
773+
stderr=subprocess.PIPE,
774+
env=environment,
775+
encoding=locale.getpreferredencoding(),
776+
check=False, # We explicitly ignore errors and carry on.
777+
)
778+
779+
lines = emcc_process.stdout.splitlines()
780+
781+
# Parse the arguments from the output of the emcc process.
782+
if BEGIN_ARGS_MARKER in lines:
783+
begin_args_idx = lines.index(BEGIN_ARGS_MARKER)
784+
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
785+
args = lines[begin_args_idx + 1:end_args_idx]
786+
clang_driver = pathlib.PurePath(args[0])
787+
if _is_relative_to(clang_driver, workspace_absolute):
788+
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
789+
return args
790+
791+
assert False, f'Failed to parse emcc output: {emcc_process.stderr}'
792+
BEGIN_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS==='
793+
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='
794+
795+
770796
def _all_platform_patch(compile_args: typing.List[str]):
771797
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
772798
# clangd writes module cache files to the wrong place
@@ -1023,10 +1049,14 @@ def _get_cpp_command_for_files(compile_action):
10231049
10241050
Undo Bazel-isms and figures out which files clangd should apply the command to.
10251051
"""
1026-
# Patch command by platform
1027-
compile_action.arguments = _all_platform_patch(compile_action.arguments)
1052+
# Condense aquery's environment variables into a dictionary, the format you might expect.
1053+
compile_action.environmentVariables = {pair.key: pair.value for pair in getattr(compile_action, 'environmentVariables', [])}
1054+
1055+
# Patch command by platform, revealing any hidden arguments.
10281056
compile_action.arguments = _apple_platform_patch(compile_action.arguments)
1057+
compile_action.arguments = _emscripten_platform_patch(compile_action)
10291058
# Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed.
1059+
compile_action.arguments = _all_platform_patch(compile_action.arguments)
10301060

10311061
source_files, header_files = _get_files(compile_action)
10321062

refresh_compile_commands.bzl

+3
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def refresh_compile_commands(
9898
version_checker_script_name,
9999
script_name,
100100
],
101+
data = ["@hedron_compile_commands//:print_args"],
101102
imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137
102103
**kwargs
103104
)
@@ -115,6 +116,7 @@ def _expand_template_impl(ctx):
115116
" {windows_default_include_paths}": "\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html
116117
"{exclude_headers}": repr(ctx.attr.exclude_headers),
117118
"{exclude_external_sources}": repr(ctx.attr.exclude_external_sources),
119+
"{print_args_executable}": repr(ctx.executable._print_args_executable.path),
118120
},
119121
)
120122
return DefaultInfo(files = depset([script]))
@@ -125,6 +127,7 @@ _expand_template = rule(
125127
"exclude_external_sources": attr.bool(default = False),
126128
"exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0
127129
"_script_template": attr.label(allow_single_file = True, default = "refresh.template.py"),
130+
"_print_args_executable": attr.label(executable = True, cfg = "target", default = "//:print_args"),
128131
# For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330
129132
# Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0
130133
"_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),

0 commit comments

Comments
 (0)