Skip to content

Build: phase out git clone option --no-single-branch (one of several approaches) #10422

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions readthedocs/doc_builder/director.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ def setup_vcs(self):
environment=self.vcs_environment,
verbose_name=self.data.version.verbose_name,
version_type=self.data.version.type,
version_identifier=self.data.version.identifier,
)

# We can't do too much on ``pre_checkout`` because we haven't
Expand Down
23 changes: 20 additions & 3 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -993,7 +993,12 @@ def has_htmlzip(self, version_slug=LATEST, version_type=None):
)

def vcs_repo(
self, environment, version=LATEST, verbose_name=None, version_type=None
self,
environment,
version=LATEST,
verbose_name=None,
version_type=None,
version_identifier=None,
):
"""
Return a Backend object for this project able to handle VCS commands.
Expand All @@ -1012,8 +1017,12 @@ def vcs_repo(
repo = None
else:
repo = backend(
self, version, environment=environment,
verbose_name=verbose_name, version_type=version_type
self,
version,
environment=environment,
verbose_name=verbose_name,
version_type=version_type,
version_identifier=version_identifier,
)
return repo

Expand Down Expand Up @@ -1941,6 +1950,7 @@ def add_features(sender, **kwargs):
INDEX_FROM_HTML_FILES = 'index_from_html_files'

# Build related features
GIT_CLONE_SINGLE_BRANCH = "git_clone_single_branch"
LIST_PACKAGES_INSTALLED_ENV = "list_packages_installed_env"
VCS_REMOTE_LISTING = "vcs_remote_listing"
SPHINX_PARALLEL = "sphinx_parallel"
Expand Down Expand Up @@ -2101,6 +2111,13 @@ def add_features(sender, **kwargs):
'on build\'s output',
),
),
(
GIT_CLONE_SINGLE_BRANCH,
_(
"Build: Single branch clone - when cloning a repository, only fetch the single "
"relevant branch of the build."
),
),
(
VCS_REMOTE_LISTING,
_(
Expand Down
43 changes: 40 additions & 3 deletions readthedocs/vcs_support/backends/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from git.exc import BadName, InvalidGitRepositoryError, NoSuchPathError
from gitdb.util import hex_to_bin

from readthedocs.builds.constants import EXTERNAL
from readthedocs.builds.constants import BRANCH, EXTERNAL
from readthedocs.config import ALL
from readthedocs.projects.constants import (
GITHUB_BRAND,
Expand Down Expand Up @@ -63,9 +63,12 @@ def update(self):
return self.fetch()
self.make_clean_working_dir()
# A fetch is always required to get external versions properly
# This is because the HEAD from the clone action usually does not contain
# the repo history.
if self.version_type == EXTERNAL:
self.clone()
return self.fetch()

return self.clone()

def repo_exists(self):
Expand Down Expand Up @@ -150,6 +153,13 @@ def use_shallow_clone(self):
from readthedocs.projects.models import Feature
return not self.project.has_feature(Feature.DONT_SHALLOW_CLONE)

def use_clone_single_branch(self):
"""Test whether git clone commands should contain --branch <name>."""
from readthedocs.projects.models import Feature

# TODO: Remove the 'not'
return not self.project.has_feature(Feature.GIT_CLONE_SINGLE_BRANCH)
Comment on lines +160 to +161
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

noting the TODO here :)


def fetch(self):
# --force lets us checkout branches that are not fast-forwarded
# https://github.com/readthedocs/readthedocs.org/issues/6097
Expand Down Expand Up @@ -185,7 +195,34 @@ def checkout_revision(self, revision):

def clone(self):
"""Clones the repository."""
cmd = ['git', 'clone', '--no-single-branch']
cmd = ["git", "clone"]

# If we are building a branch, we can use self.version_identifier to tell Git to clone
# this branch history.
if self.use_clone_single_branch():
if self.version_type == BRANCH and self.version_identifier:
cmd.extend(["--branch", self.version_identifier])

# We could also enable this behavior for tags as shown below
# elif self.version_type == TAG and self.version_identifier:
# cmd.extend(["--branch", self.version_identifier])

# Legacy behavior (this is intended for tags)
# If we cannot specify the branch to clone, we should get history for all remote types
# External builds aren't relevant, there is always an individual fetch operation.
# See self.update()
elif not self.version_type == EXTERNAL:
cmd.extend(["--no-single-branch"])
Comment on lines +212 to +215
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'd like to always have the same behavior for branch, tags and external versions. Otherwise, things are going to be pretty complicated to debug. Also, there will be things that work on PRs but fail on regular versions --which is the main feature of PRs builds.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's go over this tomorrow then 👍 If you can set aside some time to read the initial analysis in #9736 (comment), that should give the pretext of why this was necessary.


# Notes about external builds / GitHub and GitLab pull requests:
# We could clone the special branch 'refs/pull/<1234>' on GitHub
# However, this trick is already 'pulled' (punintended) in .fetch() so all we need to do
# here is to avoid --no-single-branch and other expensive things.
# We might actually add something like --no-checkout --depth 1.

# Legacy: Use --no-single-branch on everything (it's expensive)
else:
cmd.extend(["--no-single-branch"])

if self.use_shallow_clone():
cmd.extend(['--depth', str(self.repo_depth)])
Expand All @@ -205,7 +242,7 @@ def clone(self):
#
# The idea is to hit the APIv2 here to update the `latest` version with
# the `default_branch` we just got from the repository itself,
# after clonning it.
# after cloning it.
# However, we don't know the PK for the version we want to update.
#
# api_v2.version(pk).patch(
Expand Down
6 changes: 6 additions & 0 deletions readthedocs/vcs_support/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ def __init__(
environment,
verbose_name=None,
version_type=None,
version_identifier=None,
**kwargs
):
self.default_branch = project.default_branch
Expand All @@ -72,6 +73,11 @@ def __init__(
self.verbose_name = verbose_name
self.version_type = version_type

# It's possible to specify which Git identifier that this VCS instance expects to
# fetch data for.
# This only works for branch and tag names. Not commit IDs.
self.version_identifier = version_identifier

self.environment = environment

def check_working_dir(self):
Expand Down