diff --git a/readthedocs/api/v2/serializers.py b/readthedocs/api/v2/serializers.py index b5a74503a25..521a2e7acf4 100644 --- a/readthedocs/api/v2/serializers.py +++ b/readthedocs/api/v2/serializers.py @@ -130,20 +130,20 @@ class VersionSerializer(serializers.ModelSerializer): class Meta: model = Version fields = [ - 'id', - 'project', - 'slug', - 'identifier', - 'verbose_name', - 'privacy_level', - 'active', - 'built', - 'downloads', - 'type', - 'has_pdf', - 'has_epub', - 'has_htmlzip', - 'documentation_type', + "id", + "project", + "slug", + "identifier", + "verbose_name", + "privacy_level", + "active", + "built", + "downloads", + "type", + "has_pdf", + "has_epub", + "has_htmlzip", + "documentation_type", ] def __init__(self, *args, **kwargs): @@ -192,6 +192,7 @@ class Meta(VersionSerializer.Meta): "addons", "build_data", "canonical_url", + "machine", ] diff --git a/readthedocs/api/v2/views/integrations.py b/readthedocs/api/v2/views/integrations.py index 3ff940dd98a..cc941a65ce8 100644 --- a/readthedocs/api/v2/views/integrations.py +++ b/readthedocs/api/v2/views/integrations.py @@ -15,11 +15,7 @@ from rest_framework.status import HTTP_400_BAD_REQUEST from rest_framework.views import APIView -from readthedocs.builds.constants import ( - EXTERNAL_VERSION_STATE_CLOSED, - EXTERNAL_VERSION_STATE_OPEN, - LATEST, -) +from readthedocs.builds.constants import LATEST from readthedocs.core.signals import webhook_bitbucket, webhook_github, webhook_gitlab from readthedocs.core.views.hooks import ( build_branches, @@ -97,8 +93,8 @@ def post(self, request, project_slug): if not Project.objects.is_active(self.project): resp = {'detail': 'This project is currently disabled'} return Response(resp, status=status.HTTP_406_NOT_ACCEPTABLE) - except Project.DoesNotExist: - raise NotFound('Project not found') + except Project.DoesNotExist as exc: + raise NotFound("Project not found") from exc if not self.is_payload_valid(): log.warning('Invalid payload for project and integration.') return Response( @@ -190,7 +186,7 @@ def get_response_push(self, project, branches): :param project: Project instance :type project: Project - :param branches: List of branch names to build + :param branches: List of branch/tag names to build :type branches: list(str) """ to_build, not_building = build_branches(project, branches) @@ -365,7 +361,7 @@ def get_external_version_data(self): return data except KeyError as e: key = e.args[0] - raise ParseError(f"Invalid payload. {key} is required.") + raise ParseError(f"Invalid payload. {key} is required.") from e def is_payload_valid(self): """ @@ -500,8 +496,8 @@ def handle_webhook(self): try: branch = self._normalize_ref(self.data["ref"]) return self.get_response_push(self.project, [branch]) - except KeyError: - raise ParseError('Parameter "ref" is required') + except KeyError as exc: + raise ParseError('Parameter "ref" is required') from exc return None @@ -581,7 +577,7 @@ def get_external_version_data(self): return data except KeyError as e: key = e.args[0] - raise ParseError(f"Invalid payload. {key} is required.") + raise ParseError(f"Invalid payload. {key} is required.") from e def handle_webhook(self): """ @@ -624,8 +620,8 @@ def handle_webhook(self): try: branch = self._normalize_ref(data["ref"]) return self.get_response_push(self.project, [branch]) - except KeyError: - raise ParseError('Parameter "ref" is required') + except KeyError as exc: + raise ParseError('Parameter "ref" is required') from exc if self.project.external_builds_enabled and event == GITLAB_MERGE_REQUEST: if ( @@ -726,8 +722,8 @@ def handle_webhook(self): ) log.debug("Triggered sync_versions.") return self.sync_versions_response(self.project) - except KeyError: - raise ParseError('Invalid request') + except KeyError as exc: + raise ParseError("Invalid request") from exc return None def is_payload_valid(self): @@ -809,8 +805,8 @@ def handle_webhook(self): self.update_default_branch(default_branch) return self.get_response_push(self.project, branches) - except TypeError: - raise ParseError('Invalid request') + except TypeError as exc: + raise ParseError("Invalid request") from exc def is_payload_valid(self): """ diff --git a/readthedocs/builds/tasks.py b/readthedocs/builds/tasks.py index e66cacdf388..b149afb0508 100644 --- a/readthedocs/builds/tasks.py +++ b/readthedocs/builds/tasks.py @@ -292,8 +292,16 @@ def sync_versions_task(project_pk, tags_data, branches_data, **kwargs): Creates new Version objects for tags/branches that aren't tracked in the database, and deletes Version objects for tags/branches that don't exists in the repository. - :param tags_data: List of dictionaries with ``verbose_name`` and ``identifier``. - :param branches_data: Same as ``tags_data`` but for branches. + :param tags_data: List of dictionaries with ``verbose_name`` and ``identifier`` + Example: [ + {"verbose_name": "v1.0.0", + "identifier": "67a9035990f44cb33091026d7453d51606350519"}, + ]. + :param branches_data: Same as ``tags_data`` but for branches (branch name, branch identifier). + Example: [ + {"verbose_name": "latest", + "identifier": "main"}, + ]. :returns: `True` or `False` if the task succeeded. """ project = Project.objects.get(pk=project_pk) diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index 3f35dc4dbde..5241175f3a3 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -94,6 +94,8 @@ def setup_vcs(self): environment=self.vcs_environment, verbose_name=self.data.version.verbose_name, version_type=self.data.version.type, + version_identifier=self.data.version.identifier, + version_machine=self.data.version.machine, ) # We can't do too much on ``pre_checkout`` because we haven't @@ -222,7 +224,7 @@ def build(self): def checkout(self): """Checkout Git repo and load build config file.""" - log.info("Cloning repository.") + log.info("Cloning and fetching.") self.vcs_repository.update() identifier = self.data.build_commit or self.data.version.identifier diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 76dd2bfd4a0..708d605871f 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -995,7 +995,13 @@ def has_htmlzip(self, version_slug=LATEST, version_type=None): ) def vcs_repo( - self, environment, version=LATEST, verbose_name=None, version_type=None + self, + environment, + version=LATEST, + verbose_name=None, + version_type=None, + version_identifier=None, + version_machine=None, ): """ Return a Backend object for this project able to handle VCS commands. @@ -1014,8 +1020,13 @@ def vcs_repo( repo = None else: repo = backend( - self, version, environment=environment, - verbose_name=verbose_name, version_type=version_type + self, + version, + environment=environment, + verbose_name=verbose_name, + version_type=version_type, + version_identifier=version_identifier, + version_machine=version_machine, ) return repo @@ -1937,6 +1948,7 @@ def add_features(sender, **kwargs): INDEX_FROM_HTML_FILES = 'index_from_html_files' # Build related features + GIT_CLONE_FETCH_CHECKOUT_PATTERN = "git_clone_fetch_checkout_pattern" HOSTING_INTEGRATIONS = "hosting_integrations" NO_CONFIG_FILE_DEPRECATED = "no_config_file" @@ -2060,6 +2072,12 @@ def add_features(sender, **kwargs): "sources" ), ), + ( + GIT_CLONE_FETCH_CHECKOUT_PATTERN, + _( + "Build: Use simplified and optimized git clone + git fetch + git checkout patterns" + ), + ), ( HOSTING_INTEGRATIONS, _( diff --git a/readthedocs/projects/tasks/mixins.py b/readthedocs/projects/tasks/mixins.py index 830e13ca458..5d831e1aee1 100644 --- a/readthedocs/projects/tasks/mixins.py +++ b/readthedocs/projects/tasks/mixins.py @@ -45,9 +45,13 @@ def sync_versions(self, vcs_repository): # Do not use ``ls-remote`` if the VCS does not support it or if we # have already cloned the repository locally. The latter happens # when triggering a normal build. - use_lsremote = ( - vcs_repository.supports_lsremote - and not vcs_repository.repo_exists() + # Always use lsremote when we have GIT_CLONE_FETCH_CHECKOUT_PATTERN on + # and the repo supports lsremote. + # The new pattern does not fetch branch and tag data, so we always + # have to do ls-remote. + use_lsremote = vcs_repository.supports_lsremote and ( + self.data.project.has_feature(Feature.GIT_CLONE_FETCH_CHECKOUT_PATTERN) + or not vcs_repository.repo_exists() ) sync_tags = vcs_repository.supports_tags and not self.data.project.has_feature( Feature.SKIP_SYNC_TAGS @@ -63,6 +67,10 @@ def sync_versions(self, vcs_repository): include_tags=sync_tags, include_branches=sync_branches, ) + + # GIT_CLONE_FETCH_CHECKOUT_PATTERN: When the feature flag becomes default, we + # can remove this segment since lsremote is always on. + # We can even factor out the dependency to gitpython. else: if sync_tags: tags = vcs_repository.tags diff --git a/readthedocs/rtd_tests/tests/test_api.py b/readthedocs/rtd_tests/tests/test_api.py index 241a4cba419..be98ff349ac 100644 --- a/readthedocs/rtd_tests/tests/test_api.py +++ b/readthedocs/rtd_tests/tests/test_api.py @@ -3127,14 +3127,15 @@ def test_get_version_by_id(self): "users": [1], "urlconf": None, }, - 'privacy_level': 'public', - 'downloads': {}, - 'identifier': '2404a34eba4ee9c48cc8bc4055b99a48354f4950', - 'slug': '0.8', - 'has_epub': False, - 'has_htmlzip': False, - 'has_pdf': False, - 'documentation_type': 'sphinx', + "privacy_level": "public", + "downloads": {}, + "identifier": "2404a34eba4ee9c48cc8bc4055b99a48354f4950", + "slug": "0.8", + "has_epub": False, + "has_htmlzip": False, + "has_pdf": False, + "documentation_type": "sphinx", + "machine": False, } self.assertDictEqual( diff --git a/readthedocs/rtd_tests/tests/test_backend.py b/readthedocs/rtd_tests/tests/test_backend.py index 4d978cbd2dc..9a124378a5c 100644 --- a/readthedocs/rtd_tests/tests/test_backend.py +++ b/readthedocs/rtd_tests/tests/test_backend.py @@ -9,7 +9,7 @@ from django.contrib.auth.models import User from django.test import TestCase -from readthedocs.builds.constants import EXTERNAL +from readthedocs.builds.constants import BRANCH, EXTERNAL, TAG from readthedocs.builds.models import Version from readthedocs.config import ALL from readthedocs.doc_builder.environments import LocalBuildEnvironment @@ -21,6 +21,7 @@ delete_git_branch, delete_git_tag, get_current_commit, + get_git_latest_commit_hash, make_test_git, make_test_hg, ) @@ -48,6 +49,11 @@ def setUp(self): self.dummy_conf.submodules.exclude = [] self.build_environment = LocalBuildEnvironment(api_client=mock.MagicMock()) + def tearDown(self): + repo = self.project.vcs_repo(environment=self.build_environment) + repo.make_clean_working_dir() + super().tearDown() + def test_git_lsremote(self): repo_path = self.project.repo default_branches = [ @@ -378,6 +384,253 @@ def test_fetch_clean_tags_and_branches(self, checkout_path): ) +@mock.patch("readthedocs.doc_builder.environments.BuildCommand.save", mock.MagicMock()) +class TestGitBackendNew(TestGitBackend): + """ + Test the entire Git backend (with the GIT_CLONE_FETCH_CHECKOUT_PATTERN feature flag). + + This test class is intended to maintain all backwards compatibility when introducing new + git clone+fetch commands, hence inheriting from the former test class. + + Methods have been copied and adapted. + Once the feature ``GIT_CLONE_FETCH_CHECKOUT_PATTERN`` has become the default for all projects, + we can discard of TestGitBackend by moving the methods that aren't overwritten into this class + and renaming it. + """ + + def setUp(self): + super().setUp() + fixture.get( + Feature, + projects=[self.project], + feature_id=Feature.GIT_CLONE_FETCH_CHECKOUT_PATTERN, + ) + self.assertTrue( + self.project.has_feature(Feature.GIT_CLONE_FETCH_CHECKOUT_PATTERN) + ) + + def test_check_for_submodules(self): + """ + Test that we can get a branch called 'submodule' containing a valid submodule. + """ + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="submodule", + ) + + repo.update() + self.assertFalse(repo.are_submodules_available(self.dummy_conf)) + + # The submodule branch contains one submodule + repo.checkout("submodule") + self.assertTrue(repo.are_submodules_available(self.dummy_conf)) + + def test_check_invalid_submodule_urls(self): + """Test that a branch with an invalid submodule fails correctly.""" + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="invalidsubmodule", + ) + repo.update() + repo.checkout("invalidsubmodule") + with self.assertRaises(RepositoryError) as e: + repo.update_submodules(self.dummy_conf) + # `invalid` is created in `make_test_git` + # it's a url in ssh form. + self.assertEqual( + str(e.exception), + RepositoryError.INVALID_SUBMODULES.format(["invalid"]), + ) + + def test_check_submodule_urls(self): + """Test that a valid submodule is found in the 'submodule' branch.""" + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="submodule", + ) + repo.update() + repo.checkout("submodule") + valid, _ = repo.validate_submodules(self.dummy_conf) + self.assertTrue(valid) + + @patch("readthedocs.vcs_support.backends.git.Backend.fetch_ng") + def test_git_update_with_external_version(self, fetch): + """Test that an external Version (PR) is correctly cloned and fetched.""" + version = fixture.get( + Version, + project=self.project, + type=EXTERNAL, + active=True, + identifier="84492ad53ff8aba83015123f4e68d5897a1fd5bc", # commit hash + verbose_name="1234", # pr number + ) + repo = self.project.vcs_repo( + verbose_name=version.verbose_name, + version_type=version.type, + version_identifier=version.identifier, + environment=self.build_environment, + ) + repo.update() + fetch.assert_called_once() + + def test_invalid_submodule_is_ignored(self): + """Test that an invalid submodule isn't listed.""" + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="submodule", + ) + repo.update() + repo.checkout("submodule") + gitmodules_path = os.path.join(repo.working_dir, ".gitmodules") + + with open(gitmodules_path, "a") as f: + content = textwrap.dedent( + """ + [submodule "not-valid-path"] + path = not-valid-path + url = https://github.com/readthedocs/readthedocs.org + """ + ) + f.write(content) + + valid, submodules = repo.validate_submodules(self.dummy_conf) + self.assertTrue(valid) + self.assertEqual(list(submodules), ["foobar"]) + + def test_skip_submodule_checkout(self): + """Test that a submodule is listed as available.""" + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + version_identifier="submodule", + ) + repo.update() + repo.checkout("submodule") + self.assertTrue(repo.are_submodules_available(self.dummy_conf)) + + def test_use_shallow_clone(self): + """A test that should be removed because shallow clones are the new default.""" + # We should not be calling test_use_shallow_clone + return True + + def test_git_fetch_with_external_version(self): + """Test that fetching an external build (PR branch) correctly executes.""" + version = fixture.get(Version, project=self.project, type=EXTERNAL, active=True) + repo = self.project.vcs_repo( + verbose_name=version.verbose_name, + version_type=version.type, + environment=self.build_environment, + ) + repo.update() + code, _, _ = repo.fetch_ng() + self.assertEqual(code, 0) + + def test_git_branches(self): + """ + Test a source repository with multiple branches, can be cloned and fetched. + + For each branch, we clone and fetch and check that we get exactly what we expect. + """ + repo_path = self.project.repo + branches = [ + "develop", + "master", + "2.0.X", + "release/2.0.0", + "release/foo/bar", + ] + for branch in branches: + create_git_branch(repo_path, branch) + + for branch in branches: + # Create a repo that we want to clone and fetch a specific branch for + repo = self.project.vcs_repo( + environment=self.build_environment, + version_identifier=branch, + version_type=BRANCH, + ) + # Because current behavior is to reuse already cloned destinations, we should + # clear the working dir instead of reusing it. + repo.make_clean_working_dir() + + repo.update() + + self.assertEqual( + set([branch, "master"]), + {branch.verbose_name for branch in repo.branches}, + ) + + def test_git_branches_unicode(self): + """Test to verify that we can clone+fetch a unicode branch name.""" + + # Add a branch to the repo. + # Note: It's assumed that the source repo is re-created in setUp() + create_git_branch(self.project.repo, "release-ünîø∂é") + + repo = self.project.vcs_repo( + environment=self.build_environment, + version_identifier="release-ünîø∂é", + version_type=BRANCH, + ) + repo.update() + + # Note here that the original default branch 'master' got created during the clone + self.assertEqual( + set(["release-ünîø∂é", "master"]), + {branch.verbose_name for branch in repo.branches}, + ) + + def test_update_without_branch_name(self): + """ + Test that we get expected default branch when not specifying a branch name. + + Covers: + * Manually imported projects + + * Automatically imported projects that had their default branch name deliberately removed + during import wizard (and before the first branch sync from webhook) + """ + repo_path = self.project.repo + create_git_branch(repo_path, "develop") + + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=BRANCH, + ) + repo.update() + + # Check that the README file from the fixture exists + self.assertTrue(os.path.isfile(os.path.join(repo.working_dir, "README"))) + + # Check that "only-on-default-branch" exists (it doesn't exist on other branches) + self.assertTrue( + os.path.exists(os.path.join(repo.working_dir, "only-on-default-branch")) + ) + + # Check that we don't for instance have foobar + self.assertFalse(os.path.isfile(os.path.join(repo.working_dir, "foobar"))) + + def test_special_tag_stable(self): + """Test that an auto-generated 'stable' tag works.""" + repo_path = self.project.repo + latest_actual_commit_hash = get_git_latest_commit_hash(repo_path, "master") + + repo = self.project.vcs_repo( + environment=self.build_environment, + version_type=TAG, + version_machine=True, + version_identifier=latest_actual_commit_hash, + ) + repo.update() + + # Checkout the master branch to verify that we can checkout something + # from the above clone+fetch + repo.checkout("master") + + # Avoid trying to save the commands via the API @mock.patch('readthedocs.doc_builder.environments.BuildCommand.save', mock.MagicMock()) class TestHgBackend(TestCase): diff --git a/readthedocs/rtd_tests/utils.py b/readthedocs/rtd_tests/utils.py index b67e37142b5..d5fb2ea7499 100644 --- a/readthedocs/rtd_tests/utils.py +++ b/readthedocs/rtd_tests/utils.py @@ -66,6 +66,16 @@ def make_test_git(): # Checkout to master branch again check_output(['git', 'checkout', 'master'], env=env) + + # Add something unique to the master branch (so we can verify it's correctly checked out) + open("only-on-default-branch", "w").write( + "This file only exists in the default branch" + ) + check_output(["git", "add", "only-on-default-branch"], env=env) + check_output( + ["git", "commit", '-m"Add something unique to master branch"'], env=env + ) + return directory @@ -166,6 +176,16 @@ def create_git_branch(directory, branch): check_output(command, env=env) +@restoring_chdir +def get_git_latest_commit_hash(directory, branch): + env = environ.copy() + env["GIT_DIR"] = pjoin(directory, ".git") + chdir(directory) + + command = ["git", "rev-parse", branch] + return check_output(command, env=env).strip() + + @restoring_chdir def delete_git_branch(directory, branch): env = environ.copy() diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 16ca406a373..ad86c1615dc 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -8,7 +8,7 @@ from git.exc import BadName, InvalidGitRepositoryError, NoSuchPathError from gitdb.util import hex_to_bin -from readthedocs.builds.constants import EXTERNAL +from readthedocs.builds.constants import BRANCH, EXTERNAL, TAG from readthedocs.config import ALL from readthedocs.projects.constants import ( GITHUB_BRAND, @@ -35,10 +35,20 @@ class Backend(BaseVCS): repo_depth = 50 def __init__(self, *args, **kwargs): + # The version_identifier is a Version.identifier value passed from the build process. + # It has a special meaning since it's unfortunately not consistent, you need to be aware of + # exactly how and where to use this. + # See more in the .get_remote_fetch_refspec() docstring + self.version_identifier = kwargs.pop("version_identifier") + # We also need to know about Version.machine + self.version_machine = kwargs.pop("version_machine") super().__init__(*args, **kwargs) self.token = kwargs.get('token') self.repo_url = self._get_clone_url() + # While cloning, we can decide that we are not going to fetch anything + self._skip_fetch = False + def _get_clone_url(self): if '://' in self.repo_url: hacked_url = self.repo_url.split('://')[1] @@ -52,12 +62,25 @@ def _get_clone_url(self): # clone_url = 'git://%s' % (hacked_url) return self.repo_url + # TODO: Remove when removing GIT_CLONE_FETCH_CHECKOUT_PATTERN def set_remote_url(self, url): return self.run('git', 'remote', 'set-url', 'origin', url) def update(self): - """Clone or update the repository.""" + """Clone and/or fetch remote repository.""" super().update() + from readthedocs.projects.models import Feature + + if self.project.has_feature(Feature.GIT_CLONE_FETCH_CHECKOUT_PATTERN): + # New behavior: Clone is responsible for calling .repo_exists() and + # .make_clean_working_dir() + self.clone_ng() + + # TODO: We are still using return values in this function that are legacy. + # This should be either explained or removed. + return self.fetch_ng() + + # Old behavior if self.repo_exists(): self.set_remote_url(self.repo_url) return self.fetch() @@ -68,6 +91,176 @@ def update(self): return self.fetch() return self.clone() + def get_remote_fetch_refspec(self): + """ + Gets a valid remote reference for the identifier. + + See also: The section from ``git help fetch`` + + This method sits on top of a lot of legacy design. + It decides how to treat the incoming ``Version.identifier`` from + knowledge of how the caller (the build process) uses build data. + + Version.identifier = a branch name (branches) + Version.identifier = commit (tags) + Version.identifier = commit (external versions) + Version.verbose_name = branch alias, e.g. latest (branches) + Version.verbose_name = tag name (tags) + Version.verbose_name = PR number (external versions) + + :return: A refspec valid for fetch operation + """ + + if not self.version_type: + log.warning( + "Trying to resolve a remote reference without setting version_type is not " + "possible", + project_slug=self.project.slug, + ) + return None + + # Branches have the branch identifier set by the caller who instantiated the + # Git backend. + # If version_identifier is empty, then the fetch operation cannot know what to fetch + # and will fetch everything, in order to build what might be defined elsewhere + # as the "default branch". This can be the case for an initial build started BEFORE + # a webhook or sync versions task has concluded what the default branch is. + if self.version_type == BRANCH and self.version_identifier: + # Here we point directly to the remote branch name and update our local remote + # refspec to point here. + # The original motivation for putting 'refs/remotes/origin/' as the local refspec + # was an assumption in the .branches property that iterates over everything in + # refs/remotes/origin. We might still keep this pattern as it seems to be the most solid + # convention for storing a remote:local refspec mapping. + return ( + f"refs/heads/{self.version_identifier}:refs/remotes/origin/" + f"{self.version_identifier}" + ) + # Tags + if self.version_type == TAG and self.verbose_name: + # A "stable" tag is automatically created with Version.machine=True, + # denoting that it's not a branch/tag that really exists. + # Because we don't know if it originates from the default branch or some + # other tagged release, we will fetch the exact commit it points to. + if self.version_machine and self.verbose_name == "stable": + if self.version_identifier: + return f"{self.version_identifier}" + log.error("'stable' version without a commit hash.") + return None + return f"refs/tags/{self.verbose_name}:refs/tags/{self.verbose_name}" + + if self.version_type == EXTERNAL: + # TODO: We should be able to resolve this without looking up in oauth registry + git_provider_name = self.project.git_provider_name + + # Remote reference for Git providers where pull request builds are supported + if git_provider_name == GITHUB_BRAND: + return GITHUB_PR_PULL_PATTERN.format(id=self.verbose_name) + if self.project.git_provider_name == GITLAB_BRAND: + return GITLAB_MR_PULL_PATTERN.format(id=self.verbose_name) + + log.warning( + "Asked to do an external build for a Git provider that does not support " + "fetching a pr/mr refspec.", + project_slug=self.project.slug, + ) + + def clone_ng(self): + """ + Performs the next-generation (ng) git clone operation. + + This method is used when GIT_CLONE_FETCH_CHECKOUT_PATTERN is on. + """ + # TODO: This seems to be legacy that can be removed. + # If the repository is already cloned, we don't do anything. + # It seems to originate from when a cloned repository was cached on disk, + # and so we can call call .update() several times in the same build. + if self.repo_exists(): + return + + # TODO: This seems to be legacy that can be removed. + # There shouldn't be cases where we are asked to + # clone the repo in a non-clean working directory. + # The prior call to repo_exists() will return if a repo already exist with + # unclear guarantees about whether that even needs to be a fully consistent clone. + self.make_clean_working_dir() + + # TODO: We should add "--no-checkout" in all git clone operations, except: + # There exists a case of version_type=BRANCH without a branch name. + # This case is relevant for building projects for the first time without knowing the name + # of the default branch. Once this case has been made redundant, we can have + # --no-checkout for all clones. + # --depth 1: Shallow clone, fetch as little data as possible. + cmd = ["git", "clone", "--depth", "1", self.repo_url, "."] + + try: + # TODO: Explain or remove the return value + code, stdout, stderr = self.run(*cmd) + return code, stdout, stderr + except RepositoryError as exc: + raise RepositoryError(RepositoryError.CLONE_ERROR()) from exc + + def fetch_ng(self): + """ + Performs the next-generation (ng) git fetch operation. + + This method is used when GIT_CLONE_FETCH_CHECKOUT_PATTERN is on. + """ + + # When git clone does NOT add --no-checkout, it's because we are going + # to use the remote HEAD, so we don't have to fetch nor check out. + if self._skip_fetch: + log.info( + "Skipping git fetch", + version_identifier=self.version_identifier, + version_machine=self.version_machine, + version_verbose_name=self.verbose_name, + version_type=self.version_type, + ) + return + + # --force: Likely legacy, it seems to be irrelevant to this usage + # --prune: Likely legacy, we don't expect a previous fetch command to have run + # --prune-tags: Likely legacy, we don't expect a previous fetch command to have run + # --depth: To keep backward compatibility for now. + # This flag should be made unnecessary, it's downloading commit data that's + # never used. + cmd = [ + "git", + "fetch", + "origin", + "--force", + "--prune", + "--prune-tags", + "--depth", + str(self.repo_depth), + ] + remote_reference = self.get_remote_fetch_refspec() + + if remote_reference: + # TODO: We are still fetching the latest 50 commits. + # A PR might have another commit added after the build has started... + cmd.append(remote_reference) + + # Log a warning, except for machine versions since it's a known bug that + # we haven't stored a remote refspec in Version for those "stable" versions. + # This could be the case for an unknown default branch. + elif not self.version_machine: + # We are doing a fetch without knowing the remote reference. + # This is expensive, so log the event. + log.warning( + "Git fetch: Could not decide a remote reference for version. " + "Is it an empty default branch?", + project_slug=self.project.slug, + verbose_name=self.verbose_name, + version_type=self.version_type, + version_identifier=self.version_identifier, + ) + + # TODO: Explain or remove the return value + code, stdout, stderr = self.run(*cmd) + return code, stdout, stderr + def repo_exists(self): try: self._repo @@ -178,10 +371,10 @@ def checkout_revision(self, revision): try: code, out, err = self.run('git', 'checkout', '--force', revision) return [code, out, err] - except RepositoryError: + except RepositoryError as exc: raise RepositoryError( RepositoryError.FAILED_TO_CHECKOUT.format(revision), - ) + ) from exc def clone(self): """Clones the repository.""" @@ -213,8 +406,8 @@ def clone(self): # ) return code, stdout, stderr - except RepositoryError: - raise RepositoryError(RepositoryError.CLONE_ERROR()) + except RepositoryError as exc: + raise RepositoryError(RepositoryError.CLONE_ERROR()) from exc def lsremote(self, include_tags=True, include_branches=True): """ diff --git a/readthedocs/vcs_support/base.py b/readthedocs/vcs_support/base.py index 658fb8f5b2c..16a2e1b8417 100644 --- a/readthedocs/vcs_support/base.py +++ b/readthedocs/vcs_support/base.py @@ -100,16 +100,16 @@ def run(self, *cmd, **kwargs): try: build_cmd = self.environment.run(*cmd, **kwargs) - except BuildCancelled: + except BuildCancelled as exc: # Catch ``BuildCancelled`` here and re raise it. Otherwise, if we # raise a ``RepositoryError`` then the ``on_failure`` method from # Celery won't treat this problem as a ``BuildCancelled`` issue. - raise BuildCancelled - except BuildUserError as e: + raise BuildCancelled from exc + except BuildUserError as exc: # Re raise as RepositoryError to handle it properly from outside - if hasattr(e, "message"): - raise RepositoryError(e.message) - raise RepositoryError + if hasattr(exc, "message"): + raise RepositoryError(exc.message) from exc + raise RepositoryError from exc # Return a tuple to keep compatibility return (build_cmd.exit_code, build_cmd.output, build_cmd.error)