diff --git a/readthedocs/api/v2/serializers.py b/readthedocs/api/v2/serializers.py index 0348cc159a2..9a2fe27edb8 100644 --- a/readthedocs/api/v2/serializers.py +++ b/readthedocs/api/v2/serializers.py @@ -93,6 +93,7 @@ class Meta(ProjectSerializer.Meta): "environment_variables", "max_concurrent_builds", "readthedocs_yaml_path", + "clone_token", ) diff --git a/readthedocs/doc_builder/director.py b/readthedocs/doc_builder/director.py index 963c18b4901..c624d6f0380 100644 --- a/readthedocs/doc_builder/director.py +++ b/readthedocs/doc_builder/director.py @@ -671,6 +671,7 @@ def get_vcs_env_vars(self): env = self.get_rtd_env_vars() # Don't prompt for username, this requires Git 2.3+ env["GIT_TERMINAL_PROMPT"] = "0" + env["READTHEDOCS_GIT_CLONE_TOKEN"] = self.data.project.clone_token return env def get_rtd_env_vars(self): diff --git a/readthedocs/doc_builder/environments.py b/readthedocs/doc_builder/environments.py index 1294be80916..0694603f3d1 100644 --- a/readthedocs/doc_builder/environments.py +++ b/readthedocs/doc_builder/environments.py @@ -391,6 +391,7 @@ def _escape_command(self, cmd): not_escape_variables = ( "READTHEDOCS_OUTPUT", "READTHEDOCS_VIRTUALENV_PATH", + "READTHEDOCS_GIT_CLONE_TOKEN", "CONDA_ENVS_PATH", "CONDA_DEFAULT_ENV", ) diff --git a/readthedocs/oauth/services/base.py b/readthedocs/oauth/services/base.py index aa13b68ce59..0e787f86d6c 100644 --- a/readthedocs/oauth/services/base.py +++ b/readthedocs/oauth/services/base.py @@ -38,6 +38,7 @@ class Service: default_user_avatar_url = settings.OAUTH_AVATAR_USER_DEFAULT_URL default_org_avatar_url = settings.OAUTH_AVATAR_ORG_DEFAULT_URL supports_build_status = False + supports_clone_token = False @classmethod def for_project(cls, project): @@ -328,7 +329,3 @@ def sync_repositories(self): def sync_organizations(self): raise NotImplementedError - - def get_clone_token(self, project): - """User services make use of SSH keys only for cloning.""" - return None diff --git a/readthedocs/oauth/services/githubapp.py b/readthedocs/oauth/services/githubapp.py index 6535a50e319..cb8ad23f0d6 100644 --- a/readthedocs/oauth/services/githubapp.py +++ b/readthedocs/oauth/services/githubapp.py @@ -34,6 +34,7 @@ class GitHubAppService(Service): vcs_provider_slug = GITHUB_APP allauth_provider = GitHubAppProvider supports_build_status = True + supports_clone_token = True def __init__(self, installation: GitHubAppInstallation): self.installation = installation @@ -462,17 +463,29 @@ def get_clone_token(self, project): """ Return a token for HTTP-based Git access to the repository. + The token is scoped to have read-only access to the content of the repository attached to the project. + The token expires after one hour (this is given by GitHub and can't be changed). + See: - https://docs.github.com/en/apps/creating-github-apps/authenticating-with-a-github-app/authenticating-as-a-github-app-installation - https://docs.github.com/en/rest/apps/apps?apiVersion=2022-11-28#create-an-installation-access-token-for-an-app """ - # NOTE: we can pass the repository_ids to get a token with access to specific repositories. - # We should upstream this feature to PyGithub. - # We can also pass a specific permissions object to get a token with specific permissions - # if we want to scope this token even more. try: - access_token = self.gh_app_client.get_access_token(self.installation.installation_id) - return f"x-access-token:{access_token.token}" + # TODO: Use self.gh_app_client.get_access_token instead, + # once https://github.com/PyGithub/PyGithub/pull/3287 is merged. + _, response = self.gh_app_client.requester.requestJsonAndCheck( + "POST", + f"/app/installations/{self.installation.installation_id}/access_tokens", + headers=self.gh_app_client._get_headers(), + input={ + "repository_ids": [int(project.remote_repository.remote_id)], + "permissions": { + "contents": "read", + }, + }, + ) + token = response["token"] + return f"x-access-token:{token}" except GithubException: log.info( "Failed to get clone token for project", diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 1f8e10e1387..bfb4b13f0e3 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -968,6 +968,7 @@ def vcs_repo(self, environment, version): self, version=version, environment=environment, + use_token=bool(self.clone_token), ) return repo @@ -1398,6 +1399,29 @@ def get_subproject_candidates(self, user): def organization(self): return self.organizations.first() + @property + def clone_token(self) -> str | None: + """ + Return a HTTP-based Git access token to the repository. + + .. note:: + + - A token is only returned for projects linked to a private repository. + - Only repositories granted access by a GitHub app installation will return a token. + """ + service_class = self.get_git_service_class() + if not service_class or not self.remote_repository.private: + return None + + if not service_class.supports_clone_token: + return None + + for service in service_class.for_project(self): + token = service.get_clone_token(self) + if token: + return token + return None + class APIProject(Project): """ @@ -1414,12 +1438,17 @@ class APIProject(Project): """ features = [] + # This is a property in the original model, in order to + # be able to assign it a value in the constructor, we need to re-declare it + # as an attribute here. + clone_token = None class Meta: proxy = True def __init__(self, *args, **kwargs): self.features = kwargs.pop("features", []) + self.clone_token = kwargs.pop("clone_token", None) environment_variables = kwargs.pop("environment_variables", {}) ad_free = not kwargs.pop("show_advertising", True) # These fields only exist on the API return, not on the model, so we'll diff --git a/readthedocs/projects/tasks/builds.py b/readthedocs/projects/tasks/builds.py index 563c36581de..0214ba472f8 100644 --- a/readthedocs/projects/tasks/builds.py +++ b/readthedocs/projects/tasks/builds.py @@ -204,6 +204,7 @@ def execute(self): version=self.data.version, environment={ "GIT_TERMINAL_PROMPT": "0", + "READTHEDOCS_GIT_CLONE_TOKEN": self.data.project.clone_token, }, # Pass the api_client so that all environments have it. # This is needed for ``readthedocs-corporate``. diff --git a/readthedocs/projects/tests/test_build_tasks.py b/readthedocs/projects/tests/test_build_tasks.py index 170a068d8f8..9e83880c66d 100644 --- a/readthedocs/projects/tests/test_build_tasks.py +++ b/readthedocs/projects/tests/test_build_tasks.py @@ -6,10 +6,12 @@ from unittest import mock import django_dynamic_fixture as fixture +from django_dynamic_fixture import get import pytest from django.conf import settings from django.test.utils import override_settings +from readthedocs.allauth.providers.githubapp.provider import GitHubAppProvider from readthedocs.builds.constants import ( BUILD_STATUS_FAILURE, BUILD_STATUS_SUCCESS, @@ -21,6 +23,8 @@ from readthedocs.config.exceptions import ConfigError from readthedocs.config.tests.test_config import get_build_config from readthedocs.doc_builder.exceptions import BuildCancelled, BuildUserError +from readthedocs.oauth.models import GitHubAccountType, GitHubAppInstallation, RemoteRepository +from readthedocs.oauth.services import GitHubAppService from readthedocs.projects.exceptions import RepositoryError from readthedocs.projects.models import EnvironmentVariable, Project, WebHookEvent from readthedocs.projects.tasks.builds import sync_repository_task, update_docs_task @@ -380,7 +384,7 @@ def test_get_env_vars(self, load_yaml_config, build_environment, config, externa self._trigger_update_docs_task() vcs_env_vars = build_environment.call_args_list[0][1]["environment"] - expected_vcs_env_vars = dict(**common_env_vars, GIT_TERMINAL_PROMPT="0") + expected_vcs_env_vars = dict(**common_env_vars, GIT_TERMINAL_PROMPT="0", READTHEDOCS_GIT_CLONE_TOKEN=None) assert vcs_env_vars == expected_vcs_env_vars build_env_vars = build_environment.call_args_list[1][1]["environment"] @@ -1082,6 +1086,287 @@ def test_build_commands_executed( ] ) + @mock.patch.object(GitHubAppService, "get_clone_token") + @mock.patch("readthedocs.doc_builder.director.load_yaml_config") + def test_build_commands_executed_with_clone_token( + self, + load_yaml_config, + get_clone_token, + ): + load_yaml_config.return_value = get_build_config( + { + "version": 2, + "formats": "all", + "sphinx": { + "configuration": "docs/conf.py", + }, + }, + validate=True, + ) + + # Create the artifact paths, so it's detected by the builder + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="html")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="json")) + os.makedirs( + self.project.artifact_path(version=self.version.slug, type_="htmlzip") + ) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="epub")) + os.makedirs(self.project.artifact_path(version=self.version.slug, type_="pdf")) + + get_clone_token.return_value = "toke:1234" + github_app_installation = get( + GitHubAppInstallation, + installation_id=1234, + target_id=1234, + target_type=GitHubAccountType.USER, + ) + remote_repository = get( + RemoteRepository, + github_app_installation=github_app_installation, + clone_url="https://github.com/readthedocs/readthedocs.org", + vcs_provider=GitHubAppProvider.id, + private=True, + ) + self.project.remote_repository = remote_repository + self.project.save() + + self._trigger_update_docs_task() + + self.mocker.mocks["git.Backend.run"].assert_has_calls( + [ + mock.call("git", "clone", "--depth", "1", "https://$READTHEDOCS_GIT_CLONE_TOKEN@github.com/readthedocs/readthedocs.org", "."), + mock.call( + "git", + "fetch", + "origin", + "--force", + "--prune", + "--prune-tags", + "--depth", + "50", + "refs/heads/master:refs/remotes/origin/master", + ), + mock.call( + "git", + "show-ref", + "--verify", + "--quiet", + "--", + "refs/remotes/origin/a1b2c3", + record=False, + ), + mock.call("git", "checkout", "--force", "origin/a1b2c3"), + mock.call( + "git", + "ls-remote", + "--tags", + "--heads", + mock.ANY, + demux=True, + record=False, + ), + ] + ) + + python_version = settings.RTD_DOCKER_BUILD_SETTINGS["tools"]["python"]["3"] + self.mocker.mocks["environment.run"].assert_has_calls( + [ + mock.call("asdf", "install", "python", python_version), + mock.call("asdf", "global", "python", python_version), + mock.call("asdf", "reshim", "python", record=False), + mock.call( + "python", + "-mpip", + "install", + "-U", + "virtualenv", + "setuptools", + ), + mock.call( + "python", + "-mvirtualenv", + "$READTHEDOCS_VIRTUALENV_PATH", + bin_path=None, + cwd=None, + ), + mock.call( + mock.ANY, + "-m", + "pip", + "install", + "--upgrade", + "--no-cache-dir", + "pip", + "setuptools", + bin_path=mock.ANY, + cwd=mock.ANY, + ), + mock.call( + mock.ANY, + "-m", + "pip", + "install", + "--upgrade", + "--no-cache-dir", + "sphinx", + bin_path=mock.ANY, + cwd=mock.ANY, + ), + mock.call( + mock.ANY, + "-m", + "sphinx", + "-T", + "-b", + "html", + "-d", + "_build/doctrees", + "-D", + "language=en", + ".", + "$READTHEDOCS_OUTPUT/html", + cwd=mock.ANY, + bin_path=mock.ANY, + ), + mock.call( + mock.ANY, + "-m", + "sphinx", + "-T", + "-b", + "singlehtml", + "-d", + "_build/doctrees", + "-D", + "language=en", + ".", + "$READTHEDOCS_OUTPUT/htmlzip", + cwd=mock.ANY, + bin_path=mock.ANY, + ), + mock.call( + "mktemp", + "--directory", + record=False, + ), + mock.call( + "mv", + mock.ANY, + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + "mkdir", + "--parents", + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + "zip", + "--recurse-paths", + "--symlinks", + mock.ANY, + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + mock.ANY, + "-m", + "sphinx", + "-T", + "-b", + "latex", + "-d", + "_build/doctrees", + "-D", + "language=en", + ".", + "$READTHEDOCS_OUTPUT/pdf", + cwd=mock.ANY, + bin_path=mock.ANY, + ), + mock.call("cat", "latexmkrc", cwd=mock.ANY), + # NOTE: pdf `mv` commands and others are not here because the + # PDF resulting file is not found in the process (`_post_build`) + mock.call( + mock.ANY, + "-m", + "sphinx", + "-T", + "-b", + "epub", + "-d", + "_build/doctrees", + "-D", + "language=en", + ".", + "$READTHEDOCS_OUTPUT/epub", + cwd=mock.ANY, + bin_path=mock.ANY, + ), + mock.call( + "mv", + mock.ANY, + "/tmp/project-latest.epub", + cwd=mock.ANY, + record=False, + ), + mock.call( + "rm", + "--recursive", + "$READTHEDOCS_OUTPUT/epub", + cwd=mock.ANY, + record=False, + ), + mock.call( + "mkdir", + "--parents", + "$READTHEDOCS_OUTPUT/epub", + cwd=mock.ANY, + record=False, + ), + mock.call( + "mv", + "/tmp/project-latest.epub", + mock.ANY, + cwd=mock.ANY, + record=False, + ), + mock.call( + "test", + "-x", + "_build/html", + record=False, + cwd=mock.ANY, + ), + mock.call("lsb_release", "--description", record=False, demux=True), + mock.call("python", "--version", record=False, demux=True), + mock.call( + "dpkg-query", + "--showformat", + "${package} ${version}\\n", + "--show", + record=False, + demux=True, + ), + mock.call( + "python", + "-m", + "pip", + "list", + "--pre", + "--local", + "--format", + "json", + record=False, + demux=True, + ), + ] + ) + @mock.patch("readthedocs.doc_builder.director.load_yaml_config") def test_install_apt_packages(self, load_yaml_config): config = BuildConfigV2( diff --git a/readthedocs/rtd_tests/tests/test_api.py b/readthedocs/rtd_tests/tests/test_api.py index cd6ca0da9de..83cf535deb1 100644 --- a/readthedocs/rtd_tests/tests/test_api.py +++ b/readthedocs/rtd_tests/tests/test_api.py @@ -12,6 +12,7 @@ from rest_framework import status from rest_framework.test import APIClient +from readthedocs.allauth.providers.githubapp.provider import GitHubAppProvider from readthedocs.api.v2.models import BuildAPIKey from readthedocs.api.v2.views.integrations import ( BITBUCKET_EVENT_HEADER, @@ -56,11 +57,14 @@ from readthedocs.notifications.constants import READ, UNREAD from readthedocs.notifications.models import Notification from readthedocs.oauth.models import ( + GitHubAccountType, + GitHubAppInstallation, RemoteOrganization, RemoteOrganizationRelation, RemoteRepository, RemoteRepositoryRelation, ) +from readthedocs.oauth.services import GitHubAppService from readthedocs.projects.constants import PUBLIC from readthedocs.projects.models import ( APIProject, @@ -1419,6 +1423,44 @@ def test_project_features_multiple_projects(self): self.assertIn("features", resp.data) self.assertEqual(resp.data["features"], [feature.feature_id]) + @mock.patch.object(GitHubAppService, "get_clone_token") + def test_project_clone_token(self, get_clone_token): + clone_token = "token:1234" + get_clone_token.return_value = clone_token + project = get(Project) + + client = APIClient() + _, build_api_key = BuildAPIKey.objects.create_key(project) + client.credentials(HTTP_AUTHORIZATION=f"Token {build_api_key}") + + # No remote repository, no token. + assert project.remote_repository is None + + resp = client.get(f"/api/v2/project/{project.pk}/") + assert resp.status_code == 200 + assert resp.data["clone_token"] == None + get_clone_token.assert_not_called() + + # Project has a GitHubApp remote repository, but it's public. + github_app_installation = get(GitHubAppInstallation, installation_id=1234, target_id=1234, target_type=GitHubAccountType.USER) + remote_repository = get(RemoteRepository, vcs_provider=GitHubAppProvider.id, github_app_installation=github_app_installation, private=False) + project.remote_repository = remote_repository + project.save() + + resp = client.get(f"/api/v2/project/{project.pk}/") + assert resp.status_code == 200 + assert resp.data["clone_token"] == None + get_clone_token.assert_not_called() + + # Project has a GitHubApp remote repository, and it's private. + remote_repository.private = True + remote_repository.save() + + resp = client.get(f"/api/v2/project/{project.pk}/") + assert resp.status_code == 200 + assert resp.data["clone_token"] == clone_token + get_clone_token.assert_called_once_with(project) + def test_remote_repository_pagination(self): account = get(SocialAccount, provider="github") user = get(User) @@ -3300,6 +3342,7 @@ def test_get_version_by_id(self): "slug": "pip", "users": [1], "custom_prefix": None, + "clone_token": None, }, "privacy_level": "public", "downloads": {}, diff --git a/readthedocs/vcs_support/backends/git.py b/readthedocs/vcs_support/backends/git.py index 432d36c10d8..7a690d94a23 100644 --- a/readthedocs/vcs_support/backends/git.py +++ b/readthedocs/vcs_support/backends/git.py @@ -1,7 +1,7 @@ """Git-related utilities.""" -import re from typing import Iterable +from urllib.parse import urlparse import structlog from django.conf import settings @@ -27,22 +27,15 @@ class Backend(BaseVCS): fallback_branch = "master" # default branch repo_depth = 50 - def __init__(self, *args, **kwargs): + def __init__(self, *args, use_token=False, **kwargs): super().__init__(*args, **kwargs) - self.token = kwargs.get("token") + self.use_token = use_token self.repo_url = self._get_clone_url() def _get_clone_url(self): - if "://" in self.repo_url: - hacked_url = self.repo_url.split("://")[1] - hacked_url = re.sub(".git$", "", hacked_url) - clone_url = "https://%s" % hacked_url - if self.token: - clone_url = "https://{}@{}".format(self.token, hacked_url) - return clone_url - # Don't edit URL because all hosts aren't the same - # else: - # clone_url = 'git://%s' % (hacked_url) + if self.repo_url.startswith(("https://", "http://")) and self.use_token: + parsed_url = urlparse(self.repo_url) + return f"{parsed_url.scheme}://$READTHEDOCS_GIT_CLONE_TOKEN@{parsed_url.netloc}{parsed_url.path}" return self.repo_url def update(self):