Skip to content

Commit 1add44a

Browse files
benjaominghumitos
andauthored
Build: Simplify and optimize git backend: New clone+fetch pattern (#10430)
* New git clone/fetch/checkout behavior first steps * Use `identifier` argument for `.update()`, add lots of comments for future readers * Add missing str() cast * Remove the `identifier` keyword since we are forced to use verbose_name if tags should work :/ * Revert "Remove the `identifier` keyword since we are forced to use verbose_name if tags should work :/" This reverts commit 82178a3. * We need the identifier for branches, but not for tags and PRs * Update some comments after noticing how `Build.identifier/.verbose_name` fields are used * lint * Revert "Revert "Remove the `identifier` keyword since we are forced to use verbose_name if tags should work :/"" This reverts commit a45b42b. * Use version_identifier as an instance property * Update readthedocs/vcs_support/backends/git.py Co-authored-by: Manuel Kaufmann <[email protected]> * Updates to return values, comments and logging * Restore common * Stop fetching --tags and fetch the direct remote reference * Appease linter: Use raise-from pattern * Hello darker my old friend * Restore error handling for clone operation * Update a few comments, add a bit more raise-from pattern (actually trying to use it for debugging!) * WIP: refine some comments, log calls and small things while testing locally * New test class with Feature flag defined * Adds a tearDown method after discovering that tests were wrongly passing because working directory leaks between test cases * Log a warning if we aren't specifying the version type when using Git, fetch branches by their exact remote reference and point them to a local branch, rather than FETCH_HEAD * Update test cases for new git clone+fetch pattern * Git fetch change: Fetch branches to :refs/remotes/origin/<branch-name> + comment updates * Overwrite tests to call clone_ng and fetch_ng, and rewrite some entirely * Update update() docstring * Always use lsremote for GIT_CLONE_FETCH_CHECKOUT_PATTERN * Builds need to know about Version.machine=True for tags "stable" so they don't try to get "stable" as a tag - alternatively, we hardcode a check for verbose_name=="stable" * Conditionally log a warning on stable versions * linting * add "machine" to version API test case * Use "refspec" terminology more consistently * Add test case for un-named default branches * Don't have "--no-checkout" when we *need* to checkout the remote HEAD * Add test case to verify that we can clone+fetch special tag "stable" (machine=True) * Update readthedocs/vcs_support/backends/git.py Co-authored-by: Manuel Kaufmann <[email protected]> * Remove unused logging * Use `git fetch <commit hash>` for special "stable" versions * Building an unnamed default branch now skips `git fetch` * Remove unnecessary logging, add a lot of comments to tests * Apply suggestions from @humitos code review Co-authored-by: Manuel Kaufmann <[email protected]> * Update readthedocs/projects/models.py Co-authored-by: Manuel Kaufmann <[email protected]> * Always clean up the presumed repo working dir after a test case * Move `machine` to `VersionAdminSerializer` * Add precision to comment * Use identifier and verbose_name that looks more like actual data would look * remove unused local reference in git fetch for "stable" tag * Don't use --no-checkout, just checkout whatever is at the FETCH_HEAD * Add more logging * Update comment * Fix logic condition: Always include `vcs_repository.supports_lsremote` * Log a warning when we try to generate a pr/mr refspec for an unsupported Git provider * improve doc strings * Remove TestGitBackendTwiceInARow * Use real commit hash in test case --------- Co-authored-by: Manuel Kaufmann <[email protected]>
1 parent 3a9befe commit 1add44a

File tree

11 files changed

+562
-62
lines changed

11 files changed

+562
-62
lines changed

readthedocs/api/v2/serializers.py

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -130,20 +130,20 @@ class VersionSerializer(serializers.ModelSerializer):
130130
class Meta:
131131
model = Version
132132
fields = [
133-
'id',
134-
'project',
135-
'slug',
136-
'identifier',
137-
'verbose_name',
138-
'privacy_level',
139-
'active',
140-
'built',
141-
'downloads',
142-
'type',
143-
'has_pdf',
144-
'has_epub',
145-
'has_htmlzip',
146-
'documentation_type',
133+
"id",
134+
"project",
135+
"slug",
136+
"identifier",
137+
"verbose_name",
138+
"privacy_level",
139+
"active",
140+
"built",
141+
"downloads",
142+
"type",
143+
"has_pdf",
144+
"has_epub",
145+
"has_htmlzip",
146+
"documentation_type",
147147
]
148148

149149
def __init__(self, *args, **kwargs):
@@ -192,6 +192,7 @@ class Meta(VersionSerializer.Meta):
192192
"addons",
193193
"build_data",
194194
"canonical_url",
195+
"machine",
195196
]
196197

197198

readthedocs/api/v2/views/integrations.py

Lines changed: 14 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,11 +15,7 @@
1515
from rest_framework.status import HTTP_400_BAD_REQUEST
1616
from rest_framework.views import APIView
1717

18-
from readthedocs.builds.constants import (
19-
EXTERNAL_VERSION_STATE_CLOSED,
20-
EXTERNAL_VERSION_STATE_OPEN,
21-
LATEST,
22-
)
18+
from readthedocs.builds.constants import LATEST
2319
from readthedocs.core.signals import webhook_bitbucket, webhook_github, webhook_gitlab
2420
from readthedocs.core.views.hooks import (
2521
build_branches,
@@ -97,8 +93,8 @@ def post(self, request, project_slug):
9793
if not Project.objects.is_active(self.project):
9894
resp = {'detail': 'This project is currently disabled'}
9995
return Response(resp, status=status.HTTP_406_NOT_ACCEPTABLE)
100-
except Project.DoesNotExist:
101-
raise NotFound('Project not found')
96+
except Project.DoesNotExist as exc:
97+
raise NotFound("Project not found") from exc
10298
if not self.is_payload_valid():
10399
log.warning('Invalid payload for project and integration.')
104100
return Response(
@@ -190,7 +186,7 @@ def get_response_push(self, project, branches):
190186
191187
:param project: Project instance
192188
:type project: Project
193-
:param branches: List of branch names to build
189+
:param branches: List of branch/tag names to build
194190
:type branches: list(str)
195191
"""
196192
to_build, not_building = build_branches(project, branches)
@@ -365,7 +361,7 @@ def get_external_version_data(self):
365361
return data
366362
except KeyError as e:
367363
key = e.args[0]
368-
raise ParseError(f"Invalid payload. {key} is required.")
364+
raise ParseError(f"Invalid payload. {key} is required.") from e
369365

370366
def is_payload_valid(self):
371367
"""
@@ -500,8 +496,8 @@ def handle_webhook(self):
500496
try:
501497
branch = self._normalize_ref(self.data["ref"])
502498
return self.get_response_push(self.project, [branch])
503-
except KeyError:
504-
raise ParseError('Parameter "ref" is required')
499+
except KeyError as exc:
500+
raise ParseError('Parameter "ref" is required') from exc
505501

506502
return None
507503

@@ -581,7 +577,7 @@ def get_external_version_data(self):
581577
return data
582578
except KeyError as e:
583579
key = e.args[0]
584-
raise ParseError(f"Invalid payload. {key} is required.")
580+
raise ParseError(f"Invalid payload. {key} is required.") from e
585581

586582
def handle_webhook(self):
587583
"""
@@ -624,8 +620,8 @@ def handle_webhook(self):
624620
try:
625621
branch = self._normalize_ref(data["ref"])
626622
return self.get_response_push(self.project, [branch])
627-
except KeyError:
628-
raise ParseError('Parameter "ref" is required')
623+
except KeyError as exc:
624+
raise ParseError('Parameter "ref" is required') from exc
629625

630626
if self.project.external_builds_enabled and event == GITLAB_MERGE_REQUEST:
631627
if (
@@ -726,8 +722,8 @@ def handle_webhook(self):
726722
)
727723
log.debug("Triggered sync_versions.")
728724
return self.sync_versions_response(self.project)
729-
except KeyError:
730-
raise ParseError('Invalid request')
725+
except KeyError as exc:
726+
raise ParseError("Invalid request") from exc
731727
return None
732728

733729
def is_payload_valid(self):
@@ -809,8 +805,8 @@ def handle_webhook(self):
809805
self.update_default_branch(default_branch)
810806

811807
return self.get_response_push(self.project, branches)
812-
except TypeError:
813-
raise ParseError('Invalid request')
808+
except TypeError as exc:
809+
raise ParseError("Invalid request") from exc
814810

815811
def is_payload_valid(self):
816812
"""

readthedocs/builds/tasks.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,16 @@ def sync_versions_task(project_pk, tags_data, branches_data, **kwargs):
292292
Creates new Version objects for tags/branches that aren't tracked in the database,
293293
and deletes Version objects for tags/branches that don't exists in the repository.
294294
295-
:param tags_data: List of dictionaries with ``verbose_name`` and ``identifier``.
296-
:param branches_data: Same as ``tags_data`` but for branches.
295+
:param tags_data: List of dictionaries with ``verbose_name`` and ``identifier``
296+
Example: [
297+
{"verbose_name": "v1.0.0",
298+
"identifier": "67a9035990f44cb33091026d7453d51606350519"},
299+
].
300+
:param branches_data: Same as ``tags_data`` but for branches (branch name, branch identifier).
301+
Example: [
302+
{"verbose_name": "latest",
303+
"identifier": "main"},
304+
].
297305
:returns: `True` or `False` if the task succeeded.
298306
"""
299307
project = Project.objects.get(pk=project_pk)

readthedocs/doc_builder/director.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ def setup_vcs(self):
9494
environment=self.vcs_environment,
9595
verbose_name=self.data.version.verbose_name,
9696
version_type=self.data.version.type,
97+
version_identifier=self.data.version.identifier,
98+
version_machine=self.data.version.machine,
9799
)
98100

99101
# We can't do too much on ``pre_checkout`` because we haven't
@@ -222,7 +224,7 @@ def build(self):
222224
def checkout(self):
223225
"""Checkout Git repo and load build config file."""
224226

225-
log.info("Cloning repository.")
227+
log.info("Cloning and fetching.")
226228
self.vcs_repository.update()
227229

228230
identifier = self.data.build_commit or self.data.version.identifier

readthedocs/projects/models.py

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -995,7 +995,13 @@ def has_htmlzip(self, version_slug=LATEST, version_type=None):
995995
)
996996

997997
def vcs_repo(
998-
self, environment, version=LATEST, verbose_name=None, version_type=None
998+
self,
999+
environment,
1000+
version=LATEST,
1001+
verbose_name=None,
1002+
version_type=None,
1003+
version_identifier=None,
1004+
version_machine=None,
9991005
):
10001006
"""
10011007
Return a Backend object for this project able to handle VCS commands.
@@ -1014,8 +1020,13 @@ def vcs_repo(
10141020
repo = None
10151021
else:
10161022
repo = backend(
1017-
self, version, environment=environment,
1018-
verbose_name=verbose_name, version_type=version_type
1023+
self,
1024+
version,
1025+
environment=environment,
1026+
verbose_name=verbose_name,
1027+
version_type=version_type,
1028+
version_identifier=version_identifier,
1029+
version_machine=version_machine,
10191030
)
10201031
return repo
10211032

@@ -1935,6 +1946,7 @@ def add_features(sender, **kwargs):
19351946
INDEX_FROM_HTML_FILES = 'index_from_html_files'
19361947

19371948
# Build related features
1949+
GIT_CLONE_FETCH_CHECKOUT_PATTERN = "git_clone_fetch_checkout_pattern"
19381950
HOSTING_INTEGRATIONS = "hosting_integrations"
19391951
NO_CONFIG_FILE_DEPRECATED = "no_config_file"
19401952
SCALE_IN_PROTECTION = "scale_in_prtection"
@@ -2058,6 +2070,12 @@ def add_features(sender, **kwargs):
20582070
"sources"
20592071
),
20602072
),
2073+
(
2074+
GIT_CLONE_FETCH_CHECKOUT_PATTERN,
2075+
_(
2076+
"Build: Use simplified and optimized git clone + git fetch + git checkout patterns"
2077+
),
2078+
),
20612079
(
20622080
HOSTING_INTEGRATIONS,
20632081
_(

readthedocs/projects/tasks/mixins.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,13 @@ def sync_versions(self, vcs_repository):
4545
# Do not use ``ls-remote`` if the VCS does not support it or if we
4646
# have already cloned the repository locally. The latter happens
4747
# when triggering a normal build.
48-
use_lsremote = (
49-
vcs_repository.supports_lsremote
50-
and not vcs_repository.repo_exists()
48+
# Always use lsremote when we have GIT_CLONE_FETCH_CHECKOUT_PATTERN on
49+
# and the repo supports lsremote.
50+
# The new pattern does not fetch branch and tag data, so we always
51+
# have to do ls-remote.
52+
use_lsremote = vcs_repository.supports_lsremote and (
53+
self.data.project.has_feature(Feature.GIT_CLONE_FETCH_CHECKOUT_PATTERN)
54+
or not vcs_repository.repo_exists()
5155
)
5256
sync_tags = vcs_repository.supports_tags and not self.data.project.has_feature(
5357
Feature.SKIP_SYNC_TAGS
@@ -63,6 +67,10 @@ def sync_versions(self, vcs_repository):
6367
include_tags=sync_tags,
6468
include_branches=sync_branches,
6569
)
70+
71+
# GIT_CLONE_FETCH_CHECKOUT_PATTERN: When the feature flag becomes default, we
72+
# can remove this segment since lsremote is always on.
73+
# We can even factor out the dependency to gitpython.
6674
else:
6775
if sync_tags:
6876
tags = vcs_repository.tags

readthedocs/rtd_tests/tests/test_api.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3127,14 +3127,15 @@ def test_get_version_by_id(self):
31273127
"users": [1],
31283128
"urlconf": None,
31293129
},
3130-
'privacy_level': 'public',
3131-
'downloads': {},
3132-
'identifier': '2404a34eba4ee9c48cc8bc4055b99a48354f4950',
3133-
'slug': '0.8',
3134-
'has_epub': False,
3135-
'has_htmlzip': False,
3136-
'has_pdf': False,
3137-
'documentation_type': 'sphinx',
3130+
"privacy_level": "public",
3131+
"downloads": {},
3132+
"identifier": "2404a34eba4ee9c48cc8bc4055b99a48354f4950",
3133+
"slug": "0.8",
3134+
"has_epub": False,
3135+
"has_htmlzip": False,
3136+
"has_pdf": False,
3137+
"documentation_type": "sphinx",
3138+
"machine": False,
31383139
}
31393140

31403141
self.assertDictEqual(

0 commit comments

Comments
 (0)