Skip to content

Commit 0e5b1aa

Browse files
committed
Switch to hermetic python
Allows retiring almost all of the MIN_PYs. (orjson fails to import on 3.12) Removes the check python version wrapper 3dddf20
1 parent 6d58fa6 commit 0e5b1aa

14 files changed

+255
-118
lines changed

.pre-commit-config.yaml

-2
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ repos:
77
- id: trailing-whitespace
88
- id: end-of-file-fixer
99
- id: check-ast
10-
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
1110
- id: debug-statements
12-
exclude: ^check_python_version\.template\.py$ # Template for bazel creates syntax error
1311
- id: mixed-line-ending
1412
- id: check-case-conflict
1513
- id: fix-byte-order-marker

BUILD

+5-2
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,10 @@ filegroup(
1515
name = "bzl_srcs_for_stardoc",
1616
visibility = ["//visibility:public"],
1717
srcs = glob(["**/*.bzl"]) + [
18-
"@bazel_tools//tools:bzl_srcs"
18+
"@bazel_tools//tools:bzl_srcs",
19+
"@hedron_compile_commands_pip//:requirements.bzl",
20+
"@python_3_11//:defs.bzl",
21+
"@rules_python//:bzl",
1922
],
2023
)
2124

@@ -25,7 +28,7 @@ filegroup(
2528
# Implementation:
2629
# If you are looking into the implementation, start with the overview in ImplementationReadme.md.
2730

28-
exports_files(["refresh.template.py", "check_python_version.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
31+
exports_files(["refresh.template.py"]) # For implicit use by the refresh_compile_commands macro, not direct use.
2932

3033
cc_binary(
3134
name = "print_args",

MODULE.bazel

+20
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,23 @@
11
module(name = "hedron_compile_commands")
22

33
use_extension("//:workspace_setup.bzl", "hedron_compile_commands_extension")
4+
use_extension("//:workspace_setup_transitive.bzl", "hedron_compile_commands_extension")
5+
use_extension("//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_extension")
6+
use_extension("//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_extension")
7+
8+
# While we're supporting the WORKSPACE, we need to load rules_python through its WORKSPACE mechanism because the (currently unstable) bzlmod APIs differ just enough that loads would fail if you tried to support both at the same time.
9+
# But this is how you'd load rules_python from bzlmod:
10+
# bazel_dep(name = "rules_python", version = "0.27.1")
11+
# python = use_extension("@rules_python//python/extensions:python.bzl", "python")
12+
# python.toolchain(
13+
# python_version = "3.11",
14+
# )
15+
# use_repo(python, "python_versions")
16+
# pip = use_extension("@rules_python//python/extensions:pip.bzl", "pip")
17+
# pip.parse(
18+
# hub_name = "hedron_compile_commands_pip",
19+
# # Available versions are listed in @rules_python//python:versions.bzl.
20+
# python_version = "3.11",
21+
# requirements_lock = "//:requirements.txt",
22+
# )
23+
# use_repo(pip, "hedron_compile_commands_pip")

README.md

+6
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,12 @@ http_archive(
8181
)
8282
load("@hedron_compile_commands//:workspace_setup.bzl", "hedron_compile_commands_setup")
8383
hedron_compile_commands_setup()
84+
load("@hedron_compile_commands//:workspace_setup_transitive.bzl", "hedron_compile_commands_setup_transitive")
85+
hedron_compile_commands_setup_transitive()
86+
load("@hedron_compile_commands//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive")
87+
hedron_compile_commands_setup_transitive_transitive()
88+
load("@hedron_compile_commands//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive_transitive")
89+
hedron_compile_commands_setup_transitive_transitive_transitive()
8490
```
8591

8692
#### Either way: Get Updates via Renovate

WORKSPACE

+6
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,9 @@ workspace(name = "hedron_compile_commands")
66

77
load("@hedron_compile_commands//:workspace_setup.bzl", "hedron_compile_commands_setup")
88
hedron_compile_commands_setup()
9+
load("@hedron_compile_commands//:workspace_setup_transitive.bzl", "hedron_compile_commands_setup_transitive")
10+
hedron_compile_commands_setup_transitive()
11+
load("@hedron_compile_commands//:workspace_setup_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive")
12+
hedron_compile_commands_setup_transitive_transitive()
13+
load("@hedron_compile_commands//:workspace_setup_transitive_transitive_transitive.bzl", "hedron_compile_commands_setup_transitive_transitive_transitive")
14+
hedron_compile_commands_setup_transitive_transitive_transitive()

check_python_version.template.py

-16
This file was deleted.

refresh.template.py

+37-63
Original file line numberDiff line numberDiff line change
@@ -11,20 +11,13 @@
1111
"""
1212

1313

14-
# This file requires python 3.6, which is enforced by check_python_version.template.py
15-
# 3.6 backwards compatibility required by @zhanyong-wan in https://github.com/hedronvision/bazel-compile-commands-extractor/issues/111.
16-
# 3.7 backwards compatibility required by @lummax in https://github.com/hedronvision/bazel-compile-commands-extractor/pull/27.
17-
# ^ Try to contact before upgrading.
18-
# When adding things could be cleaner if we had a higher minimum version, please add a comment with MIN_PY=3.<v>.
19-
# Similarly, when upgrading, please search for that MIN_PY= tag.
20-
21-
2214
import concurrent.futures
2315
import enum
24-
import functools # MIN_PY=3.9: Replace `functools.lru_cache(maxsize=None)` with `functools.cache`.
16+
import functools
2517
import itertools
2618
import json
2719
import locale
20+
import orjson # orjson is much faster than the standard library's json module (1.9 seconds vs 6.6 seconds for a ~140 MB file). See https://github.com/hedronvision/bazel-compile-commands-extractor/pull/118
2821
import os
2922
import pathlib
3023
import re
@@ -35,7 +28,7 @@
3528
import tempfile
3629
import time
3730
import types
38-
import typing # MIN_PY=3.9: Switch e.g. typing.List[str] -> list[str]
31+
import typing
3932

4033

4134
@enum.unique
@@ -96,14 +89,12 @@ def _print_header_finding_warning_once():
9689
_print_header_finding_warning_once.has_logged = False
9790

9891

99-
@functools.lru_cache(maxsize=None)
92+
@functools.lru_cache
10093
def _get_bazel_cached_action_keys():
10194
"""Gets the set of actionKeys cached in bazel-out."""
10295
action_cache_process = subprocess.run(
10396
['bazel', 'dump', '--action_cache'],
104-
# MIN_PY=3.7: Replace PIPEs with capture_output.
105-
stdout=subprocess.PIPE,
106-
stderr=subprocess.PIPE,
97+
capture_output=True,
10798
encoding=locale.getpreferredencoding(),
10899
check=True, # Should always succeed.
109100
)
@@ -157,7 +148,7 @@ def _parse_headers_from_makefile_deps(d_file_content: str, source_path_for_sanit
157148
return set(headers)
158149

159150

160-
@functools.lru_cache(maxsize=None)
151+
@functools.lru_cache
161152
def _get_cached_modified_time(path: str):
162153
"""Returns 0 if the file doesn't exist.
163154
@@ -202,7 +193,7 @@ def _is_nvcc(path: str):
202193
return os.path.basename(path).startswith('nvcc')
203194

204195

205-
def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_key: str):
196+
def _get_headers_gcc(compile_args: list[str], source_path: str, action_key: str):
206197
"""Gets the headers used by a particular compile command that uses gcc arguments formatting (including clang.)
207198
208199
Relatively slow. Requires running the C preprocessor if we can't hit Bazel's cache.
@@ -257,9 +248,7 @@ def _get_headers_gcc(compile_args: typing.List[str], source_path: str, action_ke
257248

258249
header_search_process = _subprocess_run_spilling_over_to_param_file_if_needed( # Note: gcc/clang can be run from Windows, too.
259250
header_cmd,
260-
# MIN_PY=3.7: Replace PIPEs with capture_output.
261-
stdout=subprocess.PIPE,
262-
stderr=subprocess.PIPE,
251+
capture_output=True,
263252
encoding=locale.getpreferredencoding(),
264253
check=False, # We explicitly ignore errors and carry on.
265254
)
@@ -350,7 +339,7 @@ def windows_list2cmdline(seq):
350339
return ''.join(result)
351340

352341

353-
def _subprocess_run_spilling_over_to_param_file_if_needed(command: typing.List[str], **kwargs):
342+
def _subprocess_run_spilling_over_to_param_file_if_needed(command: list[str], **kwargs):
354343
"""Same as subprocess.run, but it handles the case where the command line length is exceeded on Windows and we need a param file."""
355344

356345
# On non-Windows, we have to run directly via a special case.
@@ -378,7 +367,7 @@ def _subprocess_run_spilling_over_to_param_file_if_needed(command: typing.List[s
378367
raise
379368

380369

381-
def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
370+
def _get_headers_msvc(compile_args: list[str], source_path: str):
382371
"""Gets the headers used by a particular compile command that uses msvc argument formatting (including clang-cl.)
383372
384373
Relatively slow. Requires running the C preprocessor.
@@ -453,29 +442,19 @@ def _get_headers_msvc(compile_args: typing.List[str], source_path: str):
453442
return headers, should_cache
454443

455444

456-
def _is_relative_to(sub: pathlib.PurePath, parent: pathlib.PurePath):
457-
"""Determine if one path is relative to another."""
458-
# MIN_PY=3.9: Eliminate helper in favor of `PurePath.is_relative_to()`.
459-
try:
460-
sub.relative_to(parent)
461-
except ValueError:
462-
return False
463-
return True
464-
465-
466445
def _file_is_in_main_workspace_and_not_external(file_str: str):
467446
file_path = pathlib.PurePath(file_str)
468447
if file_path.is_absolute():
469448
workspace_absolute = pathlib.PurePath(os.environ["BUILD_WORKSPACE_DIRECTORY"])
470-
if not _is_relative_to(file_path, workspace_absolute):
449+
if not file_path.is_relative_to(workspace_absolute):
471450
return False
472451
file_path = file_path.relative_to(workspace_absolute)
473452
# You can now assume that the path is relative to the workspace.
474453
# [Already assuming that relative paths are relative to the main workspace.]
475454

476455
# some/file.h, but not external/some/file.h
477456
# also allows for things like bazel-out/generated/file.h
478-
if _is_relative_to(file_path, pathlib.PurePath("external")):
457+
if file_path.is_relative_to(pathlib.PurePath("external")):
479458
return False
480459

481460
# ... but, ignore files in e.g. bazel-out/<configuration>/bin/external/
@@ -532,7 +511,7 @@ def _get_headers(compile_action, source_path: str):
532511
cache_last_modified = os.path.getmtime(cache_file_path) # Do before opening just as a basic hedge against concurrent write, even though we won't handle the concurrent delete case perfectly.
533512
try:
534513
with open(cache_file_path) as cache_file:
535-
action_key, cached_headers = json.load(cache_file)
514+
action_key, cached_headers = orjson.loads(cache_file.read())
536515
except json.JSONDecodeError:
537516
# Corrupted cache, which can happen if, for example, the user kills the program, since writes aren't atomic.
538517
# But if it is the result of a bug, we want to print it before it's overwritten, so it can be reported
@@ -561,8 +540,11 @@ def _get_headers(compile_action, source_path: str):
561540
# Cache for future use
562541
if output_file and should_cache:
563542
os.makedirs(os.path.dirname(cache_file_path), exist_ok=True)
564-
with open(cache_file_path, 'w') as cache_file:
565-
json.dump((compile_action.actionKey, list(headers)), cache_file)
543+
with open(cache_file_path, 'wb') as cache_file:
544+
cache_file.write(orjson.dumps(
545+
(compile_action.actionKey, list(headers)),
546+
option=orjson.OPT_INDENT_2,
547+
))
566548
elif not headers and cached_headers: # If we failed to get headers, we'll fall back on a stale cache.
567549
headers = set(cached_headers)
568550

@@ -678,7 +660,7 @@ def _get_files(compile_action):
678660
_get_files.extensions_to_language_args = {ext : flag for exts, flag in _get_files.extensions_to_language_args.items() for ext in exts} # Flatten map for easier use
679661

680662

681-
@functools.lru_cache(maxsize=None)
663+
@functools.lru_cache
682664
def _get_apple_SDKROOT(SDK_name: str):
683665
"""Get path to xcode-select'd root for the given OS."""
684666
SDKROOT_maybe_versioned = subprocess.check_output(
@@ -696,7 +678,7 @@ def _get_apple_SDKROOT(SDK_name: str):
696678
# Traditionally stored in SDKROOT environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852
697679

698680

699-
def _get_apple_platform(compile_args: typing.List[str]):
681+
def _get_apple_platform(compile_args: list[str]):
700682
"""Figure out which Apple platform a command is for.
701683
702684
Is the name used by Xcode in the SDK files, not the marketing name.
@@ -710,15 +692,15 @@ def _get_apple_platform(compile_args: typing.List[str]):
710692
return None
711693

712694

713-
@functools.lru_cache(maxsize=None)
695+
@functools.lru_cache
714696
def _get_apple_DEVELOPER_DIR():
715697
"""Get path to xcode-select'd developer directory."""
716698
return subprocess.check_output(('xcode-select', '--print-path'), encoding=locale.getpreferredencoding()).rstrip()
717699
# Unless xcode-select has been invoked (like for a beta) we'd expect, e.g., '/Applications/Xcode.app/Contents/Developer' or '/Library/Developer/CommandLineTools'.
718700
# Traditionally stored in DEVELOPER_DIR environment variable, but not provided by Bazel. See https://github.com/bazelbuild/bazel/issues/12852
719701

720702

721-
def _apple_platform_patch(compile_args: typing.List[str]):
703+
def _apple_platform_patch(compile_args: list[str]):
722704
"""De-Bazel the command into something clangd can parse.
723705
724706
This function has fixes specific to Apple platforms, but you should call it on all platforms. It'll determine whether the fixes should be applied or not.
@@ -768,9 +750,7 @@ def _emscripten_platform_patch(compile_action):
768750
# On Windows, it fails to spawn the subprocess when the path uses forward slashes as a separator.
769751
# Here, we convert emcc driver path to use the native path separator.
770752
[str(emcc_driver)] + compile_action.arguments[1:],
771-
# MIN_PY=3.7: Replace PIPEs with capture_output.
772-
stdout=subprocess.PIPE,
773-
stderr=subprocess.PIPE,
753+
capture_output=True,
774754
env=environment,
775755
encoding=locale.getpreferredencoding(),
776756
check=False, # We explicitly ignore errors and carry on.
@@ -784,7 +764,7 @@ def _emscripten_platform_patch(compile_action):
784764
end_args_idx = lines.index(END_ARGS_MARKER, begin_args_idx + 1)
785765
args = lines[begin_args_idx + 1:end_args_idx]
786766
clang_driver = pathlib.PurePath(args[0])
787-
if _is_relative_to(clang_driver, workspace_absolute):
767+
if clang_driver.is_relative_to(workspace_absolute):
788768
args[0] = clang_driver.relative_to(workspace_absolute).as_posix()
789769
return args
790770

@@ -793,7 +773,7 @@ def _emscripten_platform_patch(compile_action):
793773
END_ARGS_MARKER = '===HEDRON_COMPILE_COMMANDS_END_ARGS==='
794774

795775

796-
def _all_platform_patch(compile_args: typing.List[str]):
776+
def _all_platform_patch(compile_args: list[str]):
797777
"""Apply de-Bazeling fixes to the compile command that are shared across target platforms."""
798778
# clangd writes module cache files to the wrong place
799779
# Without this fix, you get tons of module caches dumped into the VSCode root folder.
@@ -844,7 +824,7 @@ def _all_platform_patch(compile_args: typing.List[str]):
844824
return compile_args
845825

846826

847-
def _nvcc_patch(compile_args: typing.List[str]) -> typing.List[str]:
827+
def _nvcc_patch(compile_args: list[str]) -> list[str]:
848828
"""Apply fixes to args to nvcc.
849829
850830
Basically remove everything that's an nvcc arg that is not also a clang arg, converting what we can.
@@ -1087,9 +1067,7 @@ def _convert_compile_commands(aquery_output):
10871067

10881068
# Process each action from Bazelisms -> file paths and their clang commands
10891069
# Threads instead of processes because most of the execution time is farmed out to subprocesses. No need to sidestep the GIL. Might change after https://github.com/clangd/clangd/issues/123 resolved
1090-
with concurrent.futures.ThreadPoolExecutor(
1091-
max_workers=min(32, (os.cpu_count() or 1) + 4) # Backport. Default in MIN_PY=3.8. See "using very large resources implicitly on many-core machines" in https://docs.python.org/3/library/concurrent.futures.html#concurrent.futures.ThreadPoolExecutor
1092-
) as threadpool:
1070+
with concurrent.futures.ThreadPoolExecutor() as threadpool:
10931071
outputs = threadpool.map(_get_cpp_command_for_files, aquery_output.actions)
10941072

10951073
# Yield as compile_commands.json entries
@@ -1175,9 +1153,7 @@ def _get_commands(target: str, flags: str):
11751153

11761154
aquery_process = subprocess.run(
11771155
aquery_args,
1178-
# MIN_PY=3.7: Replace PIPEs with capture_output.
1179-
stdout=subprocess.PIPE,
1180-
stderr=subprocess.PIPE,
1156+
capture_output=True,
11811157
encoding=locale.getpreferredencoding(),
11821158
check=False, # We explicitly ignore errors from `bazel aquery` and carry on.
11831159
)
@@ -1235,20 +1211,20 @@ def _ensure_external_workspaces_link_exists():
12351211
dest = (pathlib.Path('bazel-out').resolve()/'../../../external').resolve()
12361212

12371213
# Handle problem cases where //external exists
1238-
if os.path.lexists(source):
1214+
if os.path.lexists(source): # MIN_PY=3.12: use source.exists(follow_symlinks=False), here and elsewhere.
12391215
# Detect symlinks or Windows junctions
12401216
# This seemed to be the cleanest way to detect both.
12411217
# Note that os.path.islink doesn't detect junctions.
12421218
try:
1243-
current_dest = os.readlink(source) # MIN_PY=3.9 source.readlink()
1219+
current_dest = source.readlink()
12441220
except OSError:
12451221
log_error(f">>> //external already exists, but it isn't a {'junction' if is_windows else 'symlink'}. //external is reserved by Bazel and needed for this tool. Please rename or delete your existing //external and rerun. More details in the README if you want them.") # Don't auto delete in case the user has something important there.
12461222
sys.exit(1)
12471223

12481224
# Normalize the path for matching
12491225
# First, workaround a gross case where Windows readlink returns extended path, starting with \\?\, causing the match to fail
1250-
if is_windows and current_dest.startswith('\\\\?\\'):
1251-
current_dest = current_dest[4:] # MIN_PY=3.9 stripprefix
1226+
if is_windows:
1227+
current_dest = current_dest.removeprefix('\\\\?\\')
12521228
current_dest = pathlib.Path(current_dest)
12531229

12541230
if dest != current_dest:
@@ -1336,7 +1312,7 @@ def _ensure_cwd_is_workspace_root():
13361312
os.chdir(workspace_root)
13371313

13381314

1339-
def main():
1315+
if __name__ == '__main__':
13401316
_ensure_cwd_is_workspace_root()
13411317
_ensure_gitignore_entries_exist()
13421318
_ensure_external_workspaces_link_exists()
@@ -1357,10 +1333,8 @@ def main():
13571333
sys.exit(1)
13581334

13591335
# Chain output into compile_commands.json
1360-
with open('compile_commands.json', 'w') as output_file:
1361-
json.dump(
1336+
with open('compile_commands.json', 'wb') as output_file:
1337+
output_file.write(orjson.dumps(
13621338
compile_command_entries,
1363-
output_file,
1364-
indent=2, # Yay, human readability!
1365-
check_circular=False # For speed.
1366-
)
1339+
option=orjson.OPT_INDENT_2,
1340+
))

0 commit comments

Comments
 (0)