diff --git a/readthedocs/projects/exceptions.py b/readthedocs/projects/exceptions.py index ad5363011ff..a48106e7ed0 100644 --- a/readthedocs/projects/exceptions.py +++ b/readthedocs/projects/exceptions.py @@ -39,10 +39,6 @@ class RepositoryError(BuildUserError): "ensure that your repository URL is correct and your repository is public. " "Private repositories are not supported.", ) - INVALID_SUBMODULES = _( - "One or more submodule URLs are not valid: {}, " - "git/ssh URL schemas for submodules are not supported." - ) DUPLICATED_RESERVED_VERSIONS = _( "You can not have two versions with the name latest or stable." " Ensure you don't have both a branch and a tag with this name." diff --git a/readthedocs/projects/tests/mockers.py b/readthedocs/projects/tests/mockers.py index b0f71a64c36..1149b4016d8 100644 --- a/readthedocs/projects/tests/mockers.py +++ b/readthedocs/projects/tests/mockers.py @@ -149,18 +149,9 @@ def _mock_git_repository(self): 'readthedocs.vcs_support.backends.git.Backend.submodules', new_callable=mock.PropertyMock, return_value=[ - mock.Mock( - path='one', - url='https://github.com/submodule/one', - ), - mock.Mock( - path='two', - url='https://github.com/submodule/two', - ), - mock.Mock( - path='three', - url='https://github.com/submodule/three', - ), + "one", + "two", + "three", ], ) diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index b888aef15a0..37480500336 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -1611,7 +1611,7 @@ def test_python_install_pip_several_options(self, load_yaml_config): @pytest.mark.parametrize( "value,expected", [ - (ALL, ["one", "two", "three"]), + (ALL, []), (["one", "two"], ["one", "two"]), ], ) diff --git a/readthedocs/projects/validators.py b/readthedocs/projects/validators.py index 0d65e5192c4..06afa689dc3 100644 --- a/readthedocs/projects/validators.py +++ b/readthedocs/projects/validators.py @@ -79,27 +79,7 @@ def __call__(self, value): raise ValidationError(_("Invalid scheme for URL")) -class SubmoduleURLValidator(RepositoryURLValidator): - - """ - A URL validator for repository submodules. - - If a repository has a relative submodule, the URL path is effectively the - supermodule's remote ``origin`` URL with the relative path applied. - - From the git docs:: - - ```` is the URL of the new submodule's origin repository. - This may be either an absolute URL, or (if it begins with ``./`` or - ``../``), the location relative to the superproject's default remote - repository - """ - - disallow_relative_url = False - - validate_repository_url = RepositoryURLValidator() -validate_submodule_url = SubmoduleURLValidator() def validate_build_config_file(path): diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index 0da7916ec2f..3d6ade6e15c 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -181,24 +181,6 @@ def test_check_for_submodules(self): repo.checkout("submodule") self.assertTrue(repo.are_submodules_available(self.dummy_conf)) - def test_check_invalid_submodule_urls(self): - """Test that a branch with an invalid submodule fails correctly.""" - repo = self.project.vcs_repo( - environment=self.build_environment, - version_type=BRANCH, - version_identifier="invalidsubmodule", - ) - repo.update() - repo.checkout("invalidsubmodule") - with self.assertRaises(RepositoryError) as e: - repo.update_submodules(self.dummy_conf) - # `invalid` is created in `make_test_git` - # it's a url in ssh form. - self.assertEqual( - str(e.exception), - RepositoryError.INVALID_SUBMODULES.format(["invalid"]), - ) - def test_check_submodule_urls(self): """Test that a valid submodule is found in the 'submodule' branch.""" repo = self.project.vcs_repo( @@ -208,7 +190,7 @@ def test_check_submodule_urls(self): ) repo.update() repo.checkout("submodule") - valid, _ = repo.validate_submodules(self.dummy_conf) + valid, _ = repo.get_available_submodules(self.dummy_conf) self.assertTrue(valid) @patch("readthedocs.vcs_support.backends.git.Backend.fetch") @@ -231,7 +213,7 @@ def test_git_update_with_external_version(self, fetch): repo.update() fetch.assert_called_once() - def test_invalid_submodule_is_ignored(self): + def test_submodule_without_url_is_included(self): """Test that an invalid submodule isn't listed.""" repo = self.project.vcs_repo( environment=self.build_environment, @@ -252,9 +234,8 @@ def test_invalid_submodule_is_ignored(self): ) f.write(content) - valid, submodules = repo.validate_submodules(self.dummy_conf) - self.assertTrue(valid) - self.assertEqual(list(submodules), ["foobar"]) + submodules = list(repo.submodules) + self.assertEqual(submodules, ["foobar", "not-valid-path"]) def test_parse_submodules(self): repo = self.project.vcs_repo( @@ -296,10 +277,16 @@ def test_parse_submodules(self): ) f.write(content) - valid, submodules = repo.validate_submodules(self.dummy_conf) - self.assertTrue(valid) + submodules = list(repo.submodules) self.assertEqual( - list(submodules), ["foobar", "path with spaces", "another-submodule"] + submodules, + [ + "foobar", + "not-valid-path", + "path with spaces", + "another-submodule", + "invalid-submodule-key", + ], ) def test_skip_submodule_checkout(self): diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 9893eb5468b..9f98b00eae3 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -1,11 +1,9 @@ """Git-related utilities.""" import re -from dataclasses import dataclass from typing import Iterable import structlog -from django.core.exceptions import ValidationError from readthedocs.builds.constants import BRANCH, EXTERNAL, TAG from readthedocs.config import ALL @@ -16,18 +14,11 @@ GITLAB_MR_PULL_PATTERN, ) from readthedocs.projects.exceptions import RepositoryError -from readthedocs.projects.validators import validate_submodule_url from readthedocs.vcs_support.base import BaseVCS, VCSVersion log = structlog.get_logger(__name__) -@dataclass(slots=True) -class GitSubmodule: - path: str - url: str - - class Backend(BaseVCS): """Git VCS backend.""" @@ -220,54 +211,53 @@ def are_submodules_available(self, config): # to have a config file. return any(self.submodules) - def validate_submodules(self, config): + def get_available_submodules(self, config) -> tuple[bool, list]: """ - Returns the submodules and check that its URLs are valid. + Returns the submodules available to use. .. note:: Always call after `self.are_submodules_available`. - :returns: tuple(bool, list) + Returns a tuple, the first element is a boolean indicating whether + the submodules are available to use, the second element is a list + of submodules paths, if the list is empty, it means that all + submodules are available. - Returns `True` if all required submodules URLs are valid. - Returns a list of all required submodules: - - Include is `ALL`, returns all submodules available. - - Include is a list, returns just those. - - Exclude is `ALL` - this should never happen. - - Exclude is a list, returns all available submodules - but those from the list. + The following cases are possible: - Returns `False` if at least one submodule is invalid. - Returns the list of invalid submodules. + - Include is `ALL`, returns `True` and `[]`. + - Include is a list, returns `True` and the list of submodules. + - Exclude is `ALL`, returns `False` and `[]`. + - Exclude is a list, returns `True` and all available submodules + but those from the list. If at the end there are no submodules + left, returns `False` and the empty list. """ - submodules = {sub.path: sub for sub in self.submodules} - - for sub_path in config.submodules.exclude: - path = sub_path.rstrip('/') - # TODO: Should we raise an error if the submodule is not found? - if path in submodules: - del submodules[path] - - if config.submodules.include != ALL and config.submodules.include: - submodules_include = {} - for sub_path in config.submodules.include: - path = sub_path.rstrip('/') - # TODO: Should we raise an error if the submodule is not found? - if path in submodules: - submodules_include[path] = submodules[path] - submodules = submodules_include - - invalid_submodules = [] - for path, submodule in submodules.items(): - try: - validate_submodule_url(submodule.url) - except ValidationError: - invalid_submodules.append(path) + if config.submodules.exclude == ALL: + return False, [] + + if config.submodules.exclude: + submodules = list(self.submodules) + for sub_path in config.submodules.exclude: + path = sub_path.rstrip("/") + try: + submodules.remove(path) + except ValueError: + # TODO: Should we raise an error if the submodule is not found? + pass - if invalid_submodules: - return False, invalid_submodules - return True, submodules.keys() + # If all submodules were excluded, we don't need to do anything. + if not submodules: + return False, [] + return True, submodules + + if config.submodules.include == ALL: + return True, [] + + if config.submodules.include: + return True, config.submodules.include + + return False, [] def checkout_revision(self, revision): try: @@ -335,11 +325,11 @@ def commit(self): return stdout.strip() @property - def submodules(self) -> Iterable[GitSubmodule]: + def submodules(self) -> Iterable[str]: r""" - Return an iterable of submodules in this repository. + Return an iterable of submodule paths in this repository. - In order to get the submodules URLs and paths without initializing them, + In order to get the submodules paths without initializing them, we parse the .gitmodules file. For this we make use of the ``git config --get-regexp`` command. @@ -352,18 +342,16 @@ def submodules(self) -> Iterable[GitSubmodule]: .. code-block:: text - submodule.submodule-1.url\nhttps://github.com/\0 submodule.submodule-1.path\nsubmodule-1\0 submodule.submodule-2.path\nsubmodule-2\0 - submodule.submodule-2.url\nhttps://github.com/\0 - submodule.submodule-3.url\nhttps://github.com/\0 submodule.submodule-4.path\n\0 .. note:: - In the example each result is put in a new line for readability. - - Isn't guaranteed that the url and path keys will appear next to each other. - - Isn't guaranteed that all submodules will have a url and path. + - Isn't guaranteed that sub-keys will appear next to each other. + - Isn't guaranteed that all submodules will have a path + (they are probably invalid submodules). """ exit_code, stdout, _ = self.run( @@ -373,16 +361,14 @@ def submodules(self) -> Iterable[GitSubmodule]: "--file", ".gitmodules", "--get-regexp", - # Get only the URL and path keys of each submodule. - r"^submodule\..*\.(url|path)$", + # Get only the path key of each submodule. + r"^submodule\..*\.path$", record=False, ) if exit_code != 0: # The command fails if the project doesn't have submodules (the .gitmodules file doesn't exist). return [] - # Group the URLs and paths by submodule name/key. - submodules = {} keys_and_values = stdout.split("\0") for key_and_value in keys_and_values: try: @@ -393,32 +379,12 @@ def submodules(self) -> Iterable[GitSubmodule]: log.warning("Wrong key and value format.", key_and_value=key_and_value) continue - if key.endswith(".url"): - key = key.removesuffix(".url") - submodules.setdefault(key, {})["url"] = value - elif key.endswith(".path"): - key = key.removesuffix(".path") - submodules.setdefault(key, {})["path"] = value + if key.endswith(".path"): + yield value else: # This should never happen, but we log a warning just in case the regex is wrong. log.warning("Unexpected key extracted fom .gitmodules.", key=key) - for submodule in submodules.values(): - # If the submodule doesn't have a URL or path, we skip it, - # since it's not a valid submodule. - url = submodule.get("url") - path = submodule.get("path") - if not url or not path: - log.warning( - "Invalid submodule.", submoduel_url=url, submodule_path=path - ) - continue - # Return a generator to speed up when checking if the project has at least one submodule (e.g. ``any(self.submodules)``) - yield GitSubmodule( - url=url, - path=path, - ) - def checkout(self, identifier=None): """Checkout to identifier or latest.""" super().checkout() @@ -436,21 +402,19 @@ def checkout(self, identifier=None): return code, out, err def update_submodules(self, config): + # TODO: just rely on get_available_submodules when + # not using a config file is fully deprecated. if self.are_submodules_available(config): - valid, submodules = self.validate_submodules(config) + valid, submodules = self.get_available_submodules(config) if valid: - self.checkout_submodules(submodules, config) - else: - raise RepositoryError( - RepositoryError.INVALID_SUBMODULES.format(submodules), - ) - - def checkout_submodules(self, submodules, config): - """Checkout all repository submodules.""" - # If the repository has no submodules, we don't need to do anything. - # Otherwise, all submodules will be updated. - if not submodules: - return + self.checkout_submodules(submodules, config.submodules.recursive) + + def checkout_submodules(self, submodules: list[str], recursive: bool): + """ + Checkout all repository submodules. + + If submodules is empty, all submodules will be updated. + """ self.run('git', 'submodule', 'sync') cmd = [ 'git', @@ -459,7 +423,7 @@ def checkout_submodules(self, submodules, config): '--init', '--force', ] - if config.submodules.recursive: + if recursive: cmd.append("--recursive") cmd.append("--") cmd += submodules