diff --git a/README.md b/README.md index aecfebb..71e2c33 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,12 @@ Howdy, Bazel user 🤠. Let's get you set up fast with some awesome tooling for There's a bunch of text here but only because we're trying to spell things out and make them easy. If you have issues, let us know; we'd love your help making things even better and more complete—and we'd love to help you! +This rule optionally uses the `orjson` pip package to significantly speed up JSON processing. You can add this before running the refresh command to ensure orjson availability: + +```shell +pip show orjson >/dev/null || pip install orjson --user +``` + ### First, add this tool to your Bazel setup. #### If you have a MODULE.bazel file and are using the new [bzlmod](https://bazel.build/external/migration) system diff --git a/refresh.template.py b/refresh.template.py index 0dd4404..26a33fa 100644 --- a/refresh.template.py +++ b/refresh.template.py @@ -543,7 +543,11 @@ def _get_headers(compile_action, source_path: str): cache_last_modified = os.path.getmtime(cache_file_path) # Do before opening just as a basic hedge against concurrent write, even though we won't handle the concurrent delete case perfectly. try: with open(cache_file_path) as cache_file: - action_key, cached_headers = json.load(cache_file) + try: + from orjson import loads + action_key, cached_headers = loads(cache_file.read()) + except ImportError: + action_key, cached_headers = json.load(cache_file) except json.JSONDecodeError: # Corrupted cache, which can happen if, for example, the user kills the program, since writes aren't atomic. # But if it is the result of a bug, we want to print it before it's overwritten, so it can be reported @@ -584,7 +588,12 @@ def _get_headers(compile_action, source_path: str): if output_file and should_cache: os.makedirs(os.path.dirname(cache_file_path), exist_ok=True) with open(cache_file_path, 'w') as cache_file: - json.dump((compile_action.actionKey, list(headers)), cache_file) + cache = (compile_action.actionKey, list(headers)) + try: + from orjson import dumps + cache_file.write(dumps(cache)) + except ImportError: + json.dump(cache, cache_file) elif not headers and cached_headers: # If we failed to get headers, we'll fall back on a stale cache. headers = set(cached_headers) @@ -592,8 +601,18 @@ def _get_headers(compile_action, source_path: str): headers = {header for header in headers if _file_is_in_main_workspace_and_not_external(header)} return headers + _get_headers.has_logged = False +def _cache_compile_action(compile_action, cache_file_path, headers): + cache = (compile_action.actionKey, list(headers)) + try: + from orjson import dumps + with open(cache_file_path, 'wb') as cache_file: + cache_file.write(dumps(cache)) + except ImportError: + with open(cache_file_path, 'w') as cache_file: + json.dump(cache, cache_file) def _get_files(compile_action): """Gets the ({source files}, {header files}) clangd should be told the command applies to.""" @@ -1305,6 +1324,24 @@ def _ensure_cwd_is_workspace_root(): # Although this can fail (OSError/FileNotFoundError/PermissionError/NotADirectoryError), there's no easy way to recover, so we'll happily crash. os.chdir(workspace_root) +def _write_compile_commands(compile_command_entries: typing.List[str]): + file_name = 'compile_commands.json' + try: + # orjson is much faster than the standard library's json module (1.9 seconds vs 6.6 seconds for a ~140 MB file). + from orjson import dumps, OPT_INDENT_2 + with open(file_name, 'wb') as output_file: + output_file.write(dumps( + compile_command_entries, + option=OPT_INDENT_2 + )) + except ImportError: + with open(file_name, 'w') as output_file: + json.dump( + compile_command_entries, + output_file, + indent=2, # Yay, human readability! + check_circular=False # For speed. + ) def main(): _ensure_cwd_is_workspace_root() @@ -1326,11 +1363,4 @@ def main(): There should be actionable warnings, above, that led to this.""") sys.exit(1) - # Chain output into compile_commands.json - with open('compile_commands.json', 'w') as output_file: - json.dump( - compile_command_entries, - output_file, - indent=2, # Yay, human readability! - check_circular=False # For speed. - ) + _write_compile_commands(compile_command_entries)