Skip to content

Commit 48f0b9d

Browse files
committed
Get the arguments emcc would pass to clang
1 parent 46ffd1f commit 48f0b9d

6 files changed

+109
-24
lines changed

BUILD

+10-1
Original file line numberDiff line numberDiff line change
@@ -25,4 +25,13 @@ filegroup(
2525
# Implementation:
2626
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.
2727

28-
exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
28+
exports_files(["refresh.template.py", "check_python_version.template.py", "print_args.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
29+
30+
filegroup(
31+
name = "print_args",
32+
srcs = select({
33+
"@bazel_tools//src/conditions:host_windows": [":print_args.bat"],
34+
"//conditions:default": [":print_args.sh"],
35+
}),
36+
visibility = ["//visibility:public"],
37+
)

print_args.bat

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
@ECHO OFF
2+
3+
py -3 %HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY% %*

print_args.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
"""
2+
Prints the arguments passed to the script
3+
"""
4+
5+
import sys
6+
7+
8+
def main():
9+
print('===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS===')
10+
for arg in sys.argv[1:]:
11+
print(arg)
12+
print('===HEDRON_COMPILE_COMMANDS_END_ARGS===')
13+
14+
# We purposely return a non-zero exit code to have the emcc process exit after running this fake clang wrapper.
15+
sys.exit(1)
16+
17+
18+
if __name__ == '__main__':
19+
main()

print_args.sh

+3
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
exec python3 "${HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY}" "$@"

refresh.template.py

+69-23
Original file line numberDiff line numberDiff line change
@@ -283,17 +283,6 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke
283283
return headers, should_cache
284284

285285

286-
@functools.lru_cache(maxsize=None)
287-
def _get_clang_or_gcc():
288-
"""Returns clang or gcc, if you have one of them on your path."""
289-
if shutil.which('clang'):
290-
return 'clang'
291-
elif shutil.which('gcc'):
292-
return 'gcc'
293-
else:
294-
return None
295-
296-
297286
def windows_list2cmdline(seq):
298287
"""
299288
Copied from list2cmdline in https://github.com/python/cpython/blob/main/Lib/subprocess.py because we need it but it's not exported as part of the public API.
@@ -567,18 +556,7 @@ def _get_headers(compile_action, source_path: str):
567556
if compile_action.arguments[0].endswith('cl.exe'): # cl.exe and also clang-cl.exe
568557
headers, should_cache = _get_headers_msvc(compile_action.arguments, source_path)
569558
else:
570-
# Emscripten is tricky. There isn't an easy way to make it emcc run without lots of environment variables.
571-
# So...rather than doing our usual script unwrapping, we just swap in clang/gcc and use that to get headers, knowing that they'll accept the same argument format.
572-
# You can unwrap emcc.sh to emcc.py via next(pathlib.Path('external').glob('emscripten_bin_*/emscripten/emcc.py')).as_posix()
573-
# But then the underlying emcc needs a configuration file that itself depends on lots of environment variables.
574-
# If we ever pick this back up, note that you can supply that config via compile_args += ["--em-config", "external/emsdk/emscripten_toolchain/emscripten_config"]
575-
args = compile_action.arguments
576-
if args[0].endswith('emcc.sh') or args[0].endswith('emcc.bat'):
577-
alternate_compiler = _get_clang_or_gcc()
578-
if not alternate_compiler: return set() # Skip getting headers.
579-
args = args.copy()
580-
args[0] = alternate_compiler
581-
headers, should_cache = _get_headers_gcc(args, source_path, compile_action.actionKey)
559+
headers, should_cache = _get_headers_gcc(compile_action.arguments, source_path, compile_action.actionKey)
582560

583561
# Cache for future use
584562
if output_file and should_cache:
@@ -767,6 +745,73 @@ def _apple_platform_patch(compile_args: typing.List[str]):
767745
return compile_args
768746

769747

748+
def _get_sysroot(args: typing.List[str]):
749+
for idx, arg in enumerate(args):
750+
if arg == '--sysroot' or arg == '-isysroot':
751+
if idx + 1 < len(args):
752+
return pathlib.PurePath(args[idx + 1])
753+
elif arg.startswith('--sysroot='):
754+
return pathlib.PurePath(arg[len('--sysroot='):])
755+
elif arg.startswith('-isysroot'):
756+
return pathlib.PurePath(arg[len('-isysroot'):])
757+
return None
758+
759+
760+
def _emscripten_platform_patch(compile_args: typing.List[str]):
761+
"""De-Bazel the command into something clangd can parse.
762+
763+
This function has fixes specific to Emscripten platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not
764+
"""
765+
emcc_driver = pathlib.Path(compile_args[0])
766+
if emcc_driver.name != 'emcc.sh' and emcc_driver.name != 'emcc.bat':
767+
return compile_args
768+
769+
workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
770+
sysroot = _get_sysroot(compile_args)
771+
assert sysroot, f'Emscripten sysroot not detected in CMD: {compile_args}'
772+
773+
def get_workspace_root(path_from_execroot: pathlib.PurePath):
774+
assert path_from_execroot.parts[0] == 'external'
775+
return pathlib.PurePath('external') / path_from_execroot.parts[1]
776+
777+
environment = {
778+
'EXT_BUILD_ROOT': str(workspace_absolute),
779+
'EM_BIN_PATH': str(get_workspace_root(sysroot)),
780+
'EM_CONFIG_PATH': str(get_workspace_root(emcc_driver) / 'emscripten_toolchain' / 'emscripten_config'),
781+
'EMCC_SKIP_SANITY_CHECK': '1',
782+
'EM_COMPILER_WRAPPER': str(pathlib.PurePath({print_args_executable})),
783+
'HEDRON_COMPILE_COMMANDS_PRINT_ARGS_PY': str(pathlib.PurePath({print_args_py})),
784+
'PATH': os.environ['PATH'],
785+
}
786+
787+
# We run the emcc process with the environment variable EM_COMPILER_WRAPPER to intercept the command line arguments passed to `clang`.
788+
emcc_process = subprocess.run(
789+
[emcc_driver] + compile_args[1:],
790+
# MIN_PY=3.7: Replace PIPEs with capture_output.
791+
stdout=subprocess.PIPE,
792+
stderr=subprocess.PIPE,
793+
env=environment,
794+
encoding=locale.getpreferredencoding(),
795+
check=False, # We explicitly ignore errors and carry on.
796+
)
797+
798+
lines = emcc_process.stdout.splitlines()
799+
800+
# Parse the arguments from the output of the emcc process.
801+
if BEGIN_ARGS_MARKER in lines:
802+
begin_args_idx = lines.index(BEGIN_ARGS_MARKER)
803+
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
804+
args = lines[begin_args_idx + 1:end_args_idx]
805+
clang_driver = pathlib.PurePath(args[0])
806+
if _is_relative_to(clang_driver, workspace_absolute):
807+
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
808+
return args
809+
810+
assert False, f'Failed to parse emcc output: {emcc_process.stderr}'
811+
BEGIN_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_BEGIN_ARGS==='
812+
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='
813+
814+
770815
def _all_platform_patch(compile_args: typing.List[str]):
771816
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
772817
# clangd writes module cache files to the wrong place
@@ -1020,6 +1065,7 @@ def _get_cpp_command_for_files(compile_action):
10201065
# Patch command by platform
10211066
compile_action.arguments = _all_platform_patch(compile_action.arguments)
10221067
compile_action.arguments = _apple_platform_patch(compile_action.arguments)
1068+
compile_action.arguments = _emscripten_platform_patch(compile_action.arguments)
10231069
# Android and Linux and grailbio LLVM toolchains: Fine as is; no special patching needed.
10241070

10251071
source_files, header_files = _get_files(compile_action)

refresh_compile_commands.bzl

+5
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ def refresh_compile_commands(
9898
version_checker_script_name,
9999
script_name,
100100
],
101+
data = ["@hedron_compile_commands//:print_args"],
101102
imports = [''], # Allows binary to import templated script, even if this macro is being called inside a sub package. See https://github.com/hedronvision/bazel-compile-commands-extractor/issues/137
102103
**kwargs
103104
)
@@ -115,6 +116,8 @@ def _expand_template_impl(ctx):
115116
" {windows_default_include_paths}": "\n".join([" %r," % path for path in find_cpp_toolchain(ctx).built_in_include_directories]), # find_cpp_toolchain is from https://docs.bazel.build/versions/main/integrating-with-rules-cc.html
116117
"{exclude_headers}": repr(ctx.attr.exclude_headers),
117118
"{exclude_external_sources}": repr(ctx.attr.exclude_external_sources),
119+
"{print_args_executable}": repr(ctx.executable._print_args_executable.path),
120+
"{print_args_py}": repr(ctx.file._print_args_py.path),
118121
},
119122
)
120123
return DefaultInfo(files = depset([script]))
@@ -125,6 +128,8 @@ _expand_template = rule(
125128
"exclude_external_sources": attr.bool(default = False),
126129
"exclude_headers": attr.string(values = ["all", "external", ""]), # "" needed only for compatibility with Bazel < 3.6.0
127130
"_script_template": attr.label(allow_single_file = True, default = "refresh.template.py"),
131+
"_print_args_executable": attr.label(executable = True, allow_single_file = True, cfg = "target", default = "print_args"),
132+
"_print_args_py": attr.label(allow_single_file = True, default = "print_args.py"),
128133
# For Windows INCLUDE. If this were eliminated, for example by the resolution of https://github.com/clangd/clangd/issues/123, we'd be able to just use a macro and skylib's expand_template rule: https://github.com/bazelbuild/bazel-skylib/pull/330
129134
# Once https://github.com/bazelbuild/bazel/pull/17108 is widely released, we should be able to eliminate this and get INCLUDE directly. Perhaps for 7.0? Should be released in the sucessor to 6.0
130135
"_cc_toolchain": attr.label(default = "@bazel_tools//tools/cpp:current_cc_toolchain"),

0 commit comments

Comments
 (0)