From ba7f9866f2512ec76b2a1c63e551a9c3e102a286 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 1 Aug 2023 20:18:16 -0500 Subject: [PATCH 1/9] Build: replace GitPython with git commands --- readthedocs/doc_builder/environments.py | 36 ++++--- readthedocs/vcs_support/backends/git.py | 132 ++++++++++++++++++++---- 2 files changed, 134 insertions(+), 34 deletions(-) diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index f81d9213d36..fcabf345471 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -166,8 +166,8 @@ def run(self): env=environment, ) cmd_stdout, cmd_stderr = proc.communicate() - self.output = self.sanitize_output(cmd_stdout) - self.error = self.sanitize_output(cmd_stderr) + self.output = self.decode_output(cmd_stdout) + self.error = self.decode_output(cmd_stderr) self.exit_code = proc.returncode except OSError: log.exception("Operating system error.") @@ -175,6 +175,14 @@ def run(self): finally: self.end_time = datetime.utcnow() + def decode_output(self, output): + sanitized = "" + try: + sanitized = output.decode("utf-8", "replace") + except (TypeError, AttributeError): + pass + return sanitized + def sanitize_output(self, output): r""" Sanitize ``output`` to be saved into the DB. @@ -193,16 +201,16 @@ def sanitize_output(self, output): :returns: sanitized output as string or ``None`` if it fails """ + sanitized = "" try: - sanitized = output.decode('utf-8', 'replace') # Replace NULL (\x00) character to avoid PostgreSQL db to fail # https://code.djangoproject.com/ticket/28201 - sanitized = sanitized.replace('\x00', '') + sanitized = output.replace("\x00", "") except (TypeError, AttributeError): - sanitized = "" + pass # Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE`` - output_length = len(output) if output else 0 + output_length = len(sanitized.encode("utf-8")) # Left some extra space for the rest of the request data threshold = 512 * 1024 # 512Kb allowed_length = settings.DATA_UPLOAD_MAX_MEMORY_SIZE - threshold @@ -236,12 +244,12 @@ def save(self, api_client): self.exit_code = 0 data = { - 'build': self.build_env.build.get('id'), - 'command': self.get_command(), - 'output': self.output, - 'exit_code': self.exit_code, - 'start_time': self.start_time, - 'end_time': self.end_time, + "build": self.build_env.build.get("id"), + "command": self.get_command(), + "output": self.sanitize_output(self.output), + "exit_code": self.exit_code, + "start_time": self.start_time, + "end_time": self.end_time, } if self.build_env.project.has_feature(Feature.API_LARGE_DATA): @@ -320,8 +328,8 @@ def run(self): cmd_stdout, cmd_stderr = out else: cmd_stdout = out - self.output = self.sanitize_output(cmd_stdout) - self.error = self.sanitize_output(cmd_stderr) + self.output = self.decode_output(cmd_stdout) + self.error = self.decode_output(cmd_stderr) cmd_ret = client.exec_inspect(exec_id=exec_cmd['Id']) self.exit_code = cmd_ret['ExitCode'] diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index ad86c1615dc..51b394015bc 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -1,11 +1,12 @@ """Git-related utilities.""" import re +from dataclasses import dataclass +from typing import Iterable import git import structlog from django.core.exceptions import ValidationError -from git.exc import BadName, InvalidGitRepositoryError, NoSuchPathError from gitdb.util import hex_to_bin from readthedocs.builds.constants import BRANCH, EXTERNAL, TAG @@ -23,6 +24,12 @@ log = structlog.get_logger(__name__) +@dataclass(slots=True) +class GitSubmodule: + path: str + url: str + + class Backend(BaseVCS): """Git VCS backend.""" @@ -262,11 +269,13 @@ def fetch_ng(self): return code, stdout, stderr def repo_exists(self): - try: - self._repo - except (InvalidGitRepositoryError, NoSuchPathError): - return False - return True + """Test if the current working directory is a top-level git repository.""" + # If we are at the top-level of a git repository, + # ``--show-prefix`` will return an empty string. + exit_code, stdout, _ = self.run( + "git", "rev-parse", "--show-prefix", record=False + ) + return exit_code == 0 and stdout.strip() == "" @property def _repo(self): @@ -282,7 +291,9 @@ def are_submodules_available(self, config): return False # Keep compatibility with previous projects - return bool(self.submodules) + # TODO: remove when all projects are required + # to have a config file. + return any(self.submodules) def validate_submodules(self, config): """ @@ -309,6 +320,7 @@ def validate_submodules(self, config): for sub_path in config.submodules.exclude: path = sub_path.rstrip('/') + # TODO: Should we raise an error if the submodule is not found? if path in submodules: del submodules[path] @@ -316,7 +328,9 @@ def validate_submodules(self, config): submodules_include = {} for sub_path in config.submodules.include: path = sub_path.rstrip('/') - submodules_include[path] = submodules[path] + # TODO: Should we raise an error if the submodule is not found? + if path in submodules: + submodules_include[path] = submodules[path] submodules = submodules_include invalid_submodules = [] @@ -523,8 +537,84 @@ def commit(self): return None @property - def submodules(self): - return list(self._repo.submodules) + def submodules(self) -> Iterable[GitSubmodule]: + """ + Return a iterable of submodules in this repository. + + In order to get the submodules URLs and paths without initializing them, + we parse the .gitmodules file. For this we make use of the + ``git config --get-regexp`` command. + + Keys and values from the config can contain spaces, + in order to parse the output unambiguously, we use the + ``--null`` option to separate each result with a null character, + and each key and value with a newline character. + + The command will produce an output like this: + + .. code-block:: text + + submodule.submodule-1.url\nhttps://github.com/\0 + submodule.submodule-1.path\nsubmodule-1\0 + submodule.submodule-2.path\nsubmodule-2\0 + submodule.submodule-2.url\nhttps://github.com/\0 + submodule.submodule-3.url\nhttps://github.com/\0 + submodule.submodule-4.path\n\0 + + .. note:: + + - In the example result is put in a new line for readability. + - Isn't guaranteed that the url and path keys will appear next to each other. + - Isn't guaranteed that that all submodules will have a url and path. + + """ + exit_code, stdout, _ = self.run( + "git", + "config", + "--null", + "--file", + ".gitmodules", + "--get-regexp", + # Get only the URL and path keys of each submodule. + r"^submodule\..*\.(url|path)$", + record=False, + ) + if exit_code != 0: + return [] + + # Group the URLs and paths by submodule name/key. + submodules = {} + keys_and_values = stdout.split("\0") + for key_and_value in keys_and_values: + try: + key, value = key_and_value.split("\n", maxsplit=1) + except ValueError: + # This should never happen, but we log a warning just in case + # Git doesn't return the expected format. + log.warning("Wrong key and value format.", key_and_value=key_and_value) + continue + + if key.endswith(".url"): + key = key.removesuffix(".url") + submodules.setdefault(key, {})["url"] = value + elif key.endswith(".path"): + key = key.removesuffix(".path") + submodules.setdefault(key, {})["path"] = value + else: + # This should never happen, but we log a warning just in case the regex is wrong. + log.warning("Unexpected key extracted fom .gitmodules.", key=key) + + for submodule in submodules.values(): + # If the submodule doesn't have a URL or path, we skip it, + # since it's not a valid submodule. + url = submodule.get("url") + path = submodule.get("path") + if not url or not path: + continue + yield GitSubmodule( + url=submodule["url"], + path=submodule["path"], + ) def checkout(self, identifier=None): """Checkout to identifier or latest.""" @@ -554,6 +644,8 @@ def update_submodules(self, config): def checkout_submodules(self, submodules, config): """Checkout all repository submodules.""" + if not submodules: + return self.run('git', 'submodule', 'sync') cmd = [ 'git', @@ -563,25 +655,25 @@ def checkout_submodules(self, submodules, config): '--force', ] if config.submodules.recursive: - cmd.append('--recursive') + cmd.append("--recursive") + cmd.append("--") cmd += submodules self.run(*cmd) def find_ref(self, ref): - # Check if ref starts with 'origin/' + # If the ref already starts with 'origin/', + # we don't need to do anything. if ref.startswith('origin/'): return ref # Check if ref is a branch of the origin remote - if self.ref_exists('remotes/origin/' + ref): - return 'origin/' + ref + if self.ref_exists("refs/remotes/origin/" + ref): + return "origin/" + ref return ref def ref_exists(self, ref): - try: - if self._repo.commit(ref): - return True - except (BadName, ValueError): - return False - return False + exit_code, _, _ = self.run( + "git", "show-ref", "--verify", "--quiet", "--", ref, record=False + ) + return exit_code == 0 From 77d3681404780ee62a7cfcbc85ea6e65caad23f4 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Aug 2023 12:15:21 -0500 Subject: [PATCH 2/9] Cleanup --- readthedocs/doc_builder/environments.py | 23 ++++++++++++----------- readthedocs/vcs_support/backends/git.py | 10 ++++++---- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index fcabf345471..70a4861ef30 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -175,31 +175,29 @@ def run(self): finally: self.end_time = datetime.utcnow() - def decode_output(self, output): - sanitized = "" + def decode_output(self, output: bytes) -> str: + """Decode bytes output to a UTF-8 string.""" + decoded = "" try: - sanitized = output.decode("utf-8", "replace") + decoded = output.decode("utf-8", "replace") except (TypeError, AttributeError): pass - return sanitized + return decoded - def sanitize_output(self, output): + def sanitize_output(self, output: str) -> str: r""" Sanitize ``output`` to be saved into the DB. - 1. Decodes to UTF-8 - - 2. Replaces NULL (\x00) characters with ``''`` (empty string) to + 1. Replaces NULL (\x00) characters with ``''`` (empty string) to avoid PostgreSQL db to fail: https://code.djangoproject.com/ticket/28201 - 3. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent + 2. Chunk at around ``DATA_UPLOAD_MAX_MEMORY_SIZE`` bytes to be sent over the API call request :param output: stdout/stderr to be sanitized - :type output: bytes - :returns: sanitized output as string or ``None`` if it fails + :returns: sanitized output as string """ sanitized = "" try: @@ -210,6 +208,9 @@ def sanitize_output(self, output): pass # Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE`` + # The length is calculated in bytes, so we need to encode the string first. + # NOTE: we are calculating the length in bytes, but truncating the string + # in characters. We should use bytes or characters, but not both. output_length = len(sanitized.encode("utf-8")) # Left some extra space for the rest of the request data threshold = 512 * 1024 # 512Kb diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 51b394015bc..df42173dc7c 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -538,7 +538,7 @@ def commit(self): @property def submodules(self) -> Iterable[GitSubmodule]: - """ + r""" Return a iterable of submodules in this repository. In order to get the submodules URLs and paths without initializing them, @@ -563,7 +563,7 @@ def submodules(self) -> Iterable[GitSubmodule]: .. note:: - - In the example result is put in a new line for readability. + - In the example each result is put in a new line for readability. - Isn't guaranteed that the url and path keys will appear next to each other. - Isn't guaranteed that that all submodules will have a url and path. @@ -612,8 +612,8 @@ def submodules(self) -> Iterable[GitSubmodule]: if not url or not path: continue yield GitSubmodule( - url=submodule["url"], - path=submodule["path"], + url=url, + path=path, ) def checkout(self, identifier=None): @@ -644,6 +644,8 @@ def update_submodules(self, config): def checkout_submodules(self, submodules, config): """Checkout all repository submodules.""" + # If the repository has no submodules, we don't need to do anything. + # Otherwise, all submodules will be updated. if not submodules: return self.run('git', 'submodule', 'sync') From 5ecfcc276f7113847a6d952833bb4d99b71ea0c1 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Aug 2023 13:41:10 -0500 Subject: [PATCH 3/9] Fix tests --- .../projects/tests/test_build_tasks.py | 6 ++-- readthedocs/rtd_tests/tests/test_backend.py | 4 +-- .../rtd_tests/tests/test_doc_building.py | 30 +++++++++++++++---- 3 files changed, 30 insertions(+), 10 deletions(-) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 547da8c42e3..7c5c4d12bde 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -713,7 +713,8 @@ def test_build_commands_executed( mock.call( "git", "clone", "--no-single-branch", "--depth", "50", mock.ANY, "." ), - mock.call("git", "checkout", "--force", "a1b2c3"), + mock.call('git', 'show-ref', '--verify', '--quiet', '--', 'refs/remotes/origin/a1b2c3', record=False), + mock.call("git", "checkout", "--force", "origin/a1b2c3"), mock.call("git", "clean", "-d", "-f", "-f"), ] ) @@ -1662,7 +1663,7 @@ def test_submodules_include(self, load_yaml_config, value, expected): self.mocker.mocks["git.Backend.run"].assert_has_calls( [ mock.call("git", "submodule", "sync"), - mock.call("git", "submodule", "update", "--init", "--force", *expected), + mock.call("git", "submodule", "update", "--init", "--force", "--", *expected), ] ) @@ -1687,6 +1688,7 @@ def test_submodules_exclude(self, load_yaml_config): "--init", "--force", "--recursive", + "--", "two", "three", ), diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index 9a124378a5c..60a6bbb9513 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -332,7 +332,7 @@ def test_invalid_submodule_is_ignored(self): content = textwrap.dedent(""" [submodule "not-valid-path"] path = not-valid-path - url = https://github.com/readthedocs/readthedocs.org + url = """) f.write(content) @@ -492,7 +492,7 @@ def test_invalid_submodule_is_ignored(self): """ [submodule "not-valid-path"] path = not-valid-path - url = https://github.com/readthedocs/readthedocs.org + url = """ ) f.write(content) diff --git a/readthedocs/rtd_tests/tests/test_doc_building.py b/readthedocs/rtd_tests/tests/test_doc_building.py index 6583cd41679..b94d0d5defd 100644 --- a/readthedocs/rtd_tests/tests/test_doc_building.py +++ b/readthedocs/rtd_tests/tests/test_doc_building.py @@ -65,7 +65,7 @@ def test_record_command_as_success(self): { "build": mock.ANY, "command": command.get_command(), - "output": command.output, + "output": "", "exit_code": 0, "start_time": command.start_time, "end_time": command.end_time, @@ -239,7 +239,16 @@ def test_missing_command(self): def test_output(self): """Test output command.""" - cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR']) + project = get(Project) + api_client = mock.MagicMock() + build_env = LocalBuildEnvironment( + project=project, + build={ + 'id': 1, + }, + api_client=api_client, + ) + cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'], build_env=build_env) # Mock BuildCommand.sanitized_output just to count the amount of calls, # but use the original method to behaves as real @@ -247,10 +256,19 @@ def test_output(self): with patch('readthedocs.doc_builder.environments.BuildCommand.sanitize_output') as sanitize_output: # noqa sanitize_output.side_effect = original_sanitized_output cmd.run() + cmd.save(api_client=api_client) self.assertEqual(cmd.output, 'FOOBAR') + api_client.command.post.assert_called_once_with({ + 'build': mock.ANY, + 'command': "/bin/bash -c echo -n FOOBAR", + "output": "FOOBAR", + "exit_code": 0, + "start_time": mock.ANY, + "end_time": mock.ANY, + }) # Check that we sanitize the output - self.assertEqual(sanitize_output.call_count, 2) + self.assertEqual(sanitize_output.call_count, 1) def test_error_output(self): """Test error output from command.""" @@ -262,9 +280,9 @@ def test_error_output(self): def test_sanitize_output(self): cmd = BuildCommand(['/bin/bash', '-c', 'echo']) checks = ( - (b'Hola', 'Hola'), - (b'H\x00i', 'Hi'), - (b'H\x00i \x00\x00\x00You!\x00', 'Hi You!'), + ('Hola', 'Hola'), + ('H\x00i', 'Hi'), + ('H\x00i \x00\x00\x00You!\x00', 'Hi You!'), ) for output, sanitized in checks: self.assertEqual(cmd.sanitize_output(output), sanitized) From 39bf8b68b6697adff602c355a0068b0d61829b38 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Aug 2023 15:01:46 -0500 Subject: [PATCH 4/9] Black --- .../projects/tests/test_build_tasks.py | 14 +++++++-- readthedocs/rtd_tests/tests/test_backend.py | 10 ++++--- .../rtd_tests/tests/test_doc_building.py | 30 ++++++++++--------- 3 files changed, 34 insertions(+), 20 deletions(-) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 7c5c4d12bde..cbe342b4f3a 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -713,7 +713,15 @@ def test_build_commands_executed( mock.call( "git", "clone", "--no-single-branch", "--depth", "50", mock.ANY, "." ), - mock.call('git', 'show-ref', '--verify', '--quiet', '--', 'refs/remotes/origin/a1b2c3', record=False), + mock.call( + "git", + "show-ref", + "--verify", + "--quiet", + "--", + "refs/remotes/origin/a1b2c3", + record=False, + ), mock.call("git", "checkout", "--force", "origin/a1b2c3"), mock.call("git", "clean", "-d", "-f", "-f"), ] @@ -1663,7 +1671,9 @@ def test_submodules_include(self, load_yaml_config, value, expected): self.mocker.mocks["git.Backend.run"].assert_has_calls( [ mock.call("git", "submodule", "sync"), - mock.call("git", "submodule", "update", "--init", "--force", "--", *expected), + mock.call( + "git", "submodule", "update", "--init", "--force", "--", *expected + ), ] ) diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index 60a6bbb9513..a37d41ba8a0 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -328,12 +328,14 @@ def test_invalid_submodule_is_ignored(self): repo.checkout('submodule') gitmodules_path = os.path.join(repo.working_dir, '.gitmodules') - with open(gitmodules_path, 'a') as f: - content = textwrap.dedent(""" + with open(gitmodules_path, "a") as f: + content = textwrap.dedent( + """ [submodule "not-valid-path"] path = not-valid-path url = - """) + """ + ) f.write(content) valid, submodules = repo.validate_submodules(self.dummy_conf) @@ -489,7 +491,7 @@ def test_invalid_submodule_is_ignored(self): with open(gitmodules_path, "a") as f: content = textwrap.dedent( - """ + """ [submodule "not-valid-path"] path = not-valid-path url = diff --git a/readthedocs/rtd_tests/tests/test_doc_building.py b/readthedocs/rtd_tests/tests/test_doc_building.py index b94d0d5defd..3b13b459995 100644 --- a/readthedocs/rtd_tests/tests/test_doc_building.py +++ b/readthedocs/rtd_tests/tests/test_doc_building.py @@ -244,11 +244,11 @@ def test_output(self): build_env = LocalBuildEnvironment( project=project, build={ - 'id': 1, + "id": 1, }, api_client=api_client, ) - cmd = BuildCommand(['/bin/bash', '-c', 'echo -n FOOBAR'], build_env=build_env) + cmd = BuildCommand(["/bin/bash", "-c", "echo -n FOOBAR"], build_env=build_env) # Mock BuildCommand.sanitized_output just to count the amount of calls, # but use the original method to behaves as real @@ -257,15 +257,17 @@ def test_output(self): sanitize_output.side_effect = original_sanitized_output cmd.run() cmd.save(api_client=api_client) - self.assertEqual(cmd.output, 'FOOBAR') - api_client.command.post.assert_called_once_with({ - 'build': mock.ANY, - 'command': "/bin/bash -c echo -n FOOBAR", - "output": "FOOBAR", - "exit_code": 0, - "start_time": mock.ANY, - "end_time": mock.ANY, - }) + self.assertEqual(cmd.output, "FOOBAR") + api_client.command.post.assert_called_once_with( + { + "build": mock.ANY, + "command": "/bin/bash -c echo -n FOOBAR", + "output": "FOOBAR", + "exit_code": 0, + "start_time": mock.ANY, + "end_time": mock.ANY, + } + ) # Check that we sanitize the output self.assertEqual(sanitize_output.call_count, 1) @@ -280,9 +282,9 @@ def test_error_output(self): def test_sanitize_output(self): cmd = BuildCommand(['/bin/bash', '-c', 'echo']) checks = ( - ('Hola', 'Hola'), - ('H\x00i', 'Hi'), - ('H\x00i \x00\x00\x00You!\x00', 'Hi You!'), + ("Hola", "Hola"), + ("H\x00i", "Hi"), + ("H\x00i \x00\x00\x00You!\x00", "Hi You!"), ) for output, sanitized in checks: self.assertEqual(cmd.sanitize_output(output), sanitized) From b724f5bf271ed77ce924050400d3353c082e0443 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Aug 2023 15:41:41 -0500 Subject: [PATCH 5/9] Format --- readthedocs/rtd_tests/tests/test_backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index a37d41ba8a0..88e0132144b 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -491,11 +491,11 @@ def test_invalid_submodule_is_ignored(self): with open(gitmodules_path, "a") as f: content = textwrap.dedent( - """ + """ [submodule "not-valid-path"] path = not-valid-path url = - """ + """ ) f.write(content) From d8a36079389db5c6fc36cd428c957b9e1ecca3f4 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Aug 2023 16:08:28 -0500 Subject: [PATCH 6/9] Test --- readthedocs/rtd_tests/tests/test_backend.py | 48 ++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index 88e0132144b..f581d5008a3 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -334,7 +334,7 @@ def test_invalid_submodule_is_ignored(self): [submodule "not-valid-path"] path = not-valid-path url = - """ + """ ) f.write(content) @@ -503,6 +503,52 @@ def test_invalid_submodule_is_ignored(self): self.assertTrue(valid) self.assertEqual(list(submodules), ["foobar"]) + def test_parse_submodules(self): + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="submodule", + ) + repo.update() + repo.checkout("submodule") + gitmodules_path = os.path.join(repo.working_dir, ".gitmodules") + + with open(gitmodules_path, "a") as f: + content = textwrap.dedent( + """ + [submodule "not-valid-path"] + path = not-valid-path + url = + + [submodule "path with spaces"] + path = path with spaces + url = https://github.com + + [submodule "another-submodule"] + url = https://github.com + path = another-submodule + + [ssubmodule "invalid-submodule-key"] + url = https://github.com + path = invalid-submodule-key + + [submodule "invalid-path-key"] + url = https://github.com + paths = invalid-submodule-key + + [submodule "invalid-url-key"] + uurl = https://github.com + path = invalid-submodule-key + """ + ) + f.write(content) + + valid, submodules = repo.validate_submodules(self.dummy_conf) + self.assertTrue(valid) + self.assertEqual( + list(submodules), ["foobar", "path with spaces", "another-submodule"] + ) + def test_skip_submodule_checkout(self): """Test that a submodule is listed as available.""" repo = self.project.vcs_repo( From a6e7f01ba797b2eca25fdc3bb5f4831fddd88443 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 3 Aug 2023 12:15:40 -0500 Subject: [PATCH 7/9] Updates from review --- readthedocs/doc_builder/environments.py | 2 +- readthedocs/rtd_tests/tests/test_doc_building.py | 9 +++++++-- readthedocs/vcs_support/backends/git.py | 9 +++++---- 3 files changed, 13 insertions(+), 7 deletions(-) diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index 70a4861ef30..b816ca2ea1d 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -209,7 +209,7 @@ def sanitize_output(self, output: str) -> str: # Chunk the output data to be less than ``DATA_UPLOAD_MAX_MEMORY_SIZE`` # The length is calculated in bytes, so we need to encode the string first. - # NOTE: we are calculating the length in bytes, but truncating the string + # TODO: we are calculating the length in bytes, but truncating the string # in characters. We should use bytes or characters, but not both. output_length = len(sanitized.encode("utf-8")) # Left some extra space for the rest of the request data diff --git a/readthedocs/rtd_tests/tests/test_doc_building.py b/readthedocs/rtd_tests/tests/test_doc_building.py index 3b13b459995..89444171927 100644 --- a/readthedocs/rtd_tests/tests/test_doc_building.py +++ b/readthedocs/rtd_tests/tests/test_doc_building.py @@ -56,7 +56,12 @@ def test_record_command_as_success(self): ) with build_env: - build_env.run('false', record_as_success=True) + build_env.run( + 'false', + record_as_success=True, + # Use a directory that exists so the command doesn't fail. + cwd='/tmp', + ) self.assertEqual(len(build_env.commands), 1) command = build_env.commands[0] @@ -65,7 +70,7 @@ def test_record_command_as_success(self): { "build": mock.ANY, "command": command.get_command(), - "output": "", + "output": command.output, "exit_code": 0, "start_time": command.start_time, "end_time": command.end_time, diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index df42173dc7c..7c8662037c9 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -539,14 +539,14 @@ def commit(self): @property def submodules(self) -> Iterable[GitSubmodule]: r""" - Return a iterable of submodules in this repository. + Return an iterable of submodules in this repository. In order to get the submodules URLs and paths without initializing them, we parse the .gitmodules file. For this we make use of the ``git config --get-regexp`` command. - Keys and values from the config can contain spaces, - in order to parse the output unambiguously, we use the + Keys and values from the config can contain spaces. + In order to parse the output unambiguously, we use the ``--null`` option to separate each result with a null character, and each key and value with a newline character. @@ -565,7 +565,7 @@ def submodules(self) -> Iterable[GitSubmodule]: - In the example each result is put in a new line for readability. - Isn't guaranteed that the url and path keys will appear next to each other. - - Isn't guaranteed that that all submodules will have a url and path. + - Isn't guaranteed that all submodules will have a url and path. """ exit_code, stdout, _ = self.run( @@ -610,6 +610,7 @@ def submodules(self) -> Iterable[GitSubmodule]: url = submodule.get("url") path = submodule.get("path") if not url or not path: + log.warning("Invalid submodule.", submoduel_url=url, submodule_path=path) continue yield GitSubmodule( url=url, From 5f14e957655079916ad466056938519440a2b7cf Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 3 Aug 2023 12:21:32 -0500 Subject: [PATCH 8/9] Black --- readthedocs/rtd_tests/tests/test_doc_building.py | 4 ++-- readthedocs/vcs_support/backends/git.py | 4 +++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_doc_building.py b/readthedocs/rtd_tests/tests/test_doc_building.py index 89444171927..684e96c04fd 100644 --- a/readthedocs/rtd_tests/tests/test_doc_building.py +++ b/readthedocs/rtd_tests/tests/test_doc_building.py @@ -57,10 +57,10 @@ def test_record_command_as_success(self): with build_env: build_env.run( - 'false', + "false", record_as_success=True, # Use a directory that exists so the command doesn't fail. - cwd='/tmp', + cwd="/tmp", ) self.assertEqual(len(build_env.commands), 1) diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 7c8662037c9..3d2a848c71c 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -610,7 +610,9 @@ def submodules(self) -> Iterable[GitSubmodule]: url = submodule.get("url") path = submodule.get("path") if not url or not path: - log.warning("Invalid submodule.", submoduel_url=url, submodule_path=path) + log.warning( + "Invalid submodule.", submoduel_url=url, submodule_path=path + ) continue yield GitSubmodule( url=url, From 848f2beac0ef4cada161ae79461df5aab3030825 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 9 Aug 2023 16:09:02 +0200 Subject: [PATCH 9/9] Apply suggestions from code review --- readthedocs/vcs_support/backends/git.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 3d2a848c71c..55328303c91 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -580,6 +580,7 @@ def submodules(self) -> Iterable[GitSubmodule]: record=False, ) if exit_code != 0: + # The command fails if the project doesn't have submodules (the .gitmodules file doesn't exist). return [] # Group the URLs and paths by submodule name/key. @@ -614,6 +615,7 @@ def submodules(self) -> Iterable[GitSubmodule]: "Invalid submodule.", submoduel_url=url, submodule_path=path ) continue + # Return a generator to speed up when checking if the project has at least one submodule (e.g. ``any(self.submodules)``) yield GitSubmodule( url=url, path=path,