Skip to content

Git backend: make default_branch to point to VCS' default branch #9424

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 18 commits into from
Jan 25, 2023
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 53 additions & 6 deletions readthedocs/api/v2/views/integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from readthedocs.builds.constants import (
EXTERNAL_VERSION_STATE_CLOSED,
EXTERNAL_VERSION_STATE_OPEN,
LATEST,
)
from readthedocs.core.signals import webhook_bitbucket, webhook_github, webhook_gitlab
from readthedocs.core.views.hooks import (
Expand Down Expand Up @@ -280,6 +281,30 @@ def get_closed_external_version_response(self, project):
"versions": [version_closed] if version_closed else [],
}

def update_default_branch(self, default_branch):
"""
Update the `Version.identifer` for `latest` with the VCS's `default_branch`.

The VCS's `default_branch` is the branch cloned when there is no specific branch specified
(e.g. `git clone <URL>`).

Some VCS providers (GitHub and GitLab) send the `default_branch` via incoming webhooks.
We use that data to update our database and keep it in sync.

This solves the problem about "changing the default branch in GitHub"
and also importing repositories with a different `default_branch` than `main` manually:
https://github.com/readthedocs/readthedocs.org/issues/9367

In case the user already selected a `default-branch` from the "Advanced settings",
it does not override it.
"""
if not self.project.default_branch:
(
self.project.versions.filter(slug=LATEST).update(
identifier=default_branch
)
)


class GitHubWebhookView(WebhookMixin, APIView):

Expand Down Expand Up @@ -417,6 +442,12 @@ def handle_webhook(self):
event=event,
)

# Always update `latest` branch to point to the default branch in the repository
# even if the event is not gonna be handled. This helps us to keep our db in sync.
default_branch = self.data.get("repository", {}).get("default_branch", None)
if default_branch:
self.update_default_branch(default_branch)

if event == GITHUB_PING:
return {"detail": "Webhook configured correctly"}

Expand Down Expand Up @@ -467,14 +498,15 @@ def handle_webhook(self):
# Trigger a build for all branches in the push
if event == GITHUB_PUSH:
try:
branches = [self._normalize_ref(self.data['ref'])]
return self.get_response_push(self.project, branches)
branch = self._normalize_ref(self.data["ref"])
return self.get_response_push(self.project, [branch])
except KeyError:
raise ParseError('Parameter "ref" is required')

return None

def _normalize_ref(self, ref):
"""Remove `ref/(heads|tags)/` from the reference to match a Version on the db."""
pattern = re.compile(r'^refs/(heads|tags)/')
return pattern.sub('', ref)

Expand Down Expand Up @@ -568,6 +600,13 @@ def handle_webhook(self):
data=self.request.data,
event=event,
)

# Always update `latest` branch to point to the default branch in the repository
# even if the event is not gonna be handled. This helps us to keep our db in sync.
default_branch = self.data.get("project", {}).get("default_branch", None)
if default_branch:
self.update_default_branch(default_branch)

# Handle push events and trigger builds
if event in (GITLAB_PUSH, GITLAB_TAG_PUSH):
data = self.request.data
Expand All @@ -583,8 +622,8 @@ def handle_webhook(self):
return self.sync_versions_response(self.project)
# Normal push to master
try:
branches = [self._normalize_ref(data['ref'])]
return self.get_response_push(self.project, branches)
branch = self._normalize_ref(data["ref"])
return self.get_response_push(self.project, [branch])
except KeyError:
raise ParseError('Parameter "ref" is required')

Expand Down Expand Up @@ -660,6 +699,11 @@ def handle_webhook(self):
data=self.request.data,
event=event,
)

# NOTE: we can't call `self.update_default_branch` here because
# BitBucket does not tell us what is the `default_branch` for a
# repository in these incoming webhooks.

if event == BITBUCKET_PUSH:
try:
data = self.request.data
Expand All @@ -676,8 +720,11 @@ def handle_webhook(self):
# we don't trigger the sync versions, because that
# will be triggered with the normal push.
if branches:
return self.get_response_push(self.project, branches)
log.debug('Triggered sync_versions.')
return self.get_response_push(
self.project,
branches,
)
log.debug("Triggered sync_versions.")
return self.sync_versions_response(self.project)
except KeyError:
raise ParseError('Invalid request')
Expand Down
2 changes: 0 additions & 2 deletions readthedocs/builds/managers.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Build and Version class model Managers."""

import structlog

from django.core.exceptions import ObjectDoesNotExist
from django.db import models
from polymorphic.managers import PolymorphicManager
Expand Down Expand Up @@ -62,7 +61,6 @@ def create_latest(self, **kwargs):
'verbose_name': LATEST_VERBOSE_NAME,
'machine': True,
'active': True,
'identifier': LATEST,
'type': BRANCH,
}
defaults.update(kwargs)
Expand Down
20 changes: 20 additions & 0 deletions readthedocs/builds/migrations/0046_identifier_null.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# Generated by Django 3.2.13 on 2022-07-11 08:53

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("builds", "0045_alter_build_status"),
]

operations = [
migrations.AlterField(
model_name="version",
name="identifier",
field=models.CharField(
blank=True, max_length=255, null=True, verbose_name="Identifier"
),
),
]
14 changes: 9 additions & 5 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,11 +119,15 @@ class Version(TimeStampedModel):
)
# used by the vcs backend

#: The identifier is the ID for the revision this is version is for. This
#: might be the revision number (e.g. in SVN), or the commit hash (e.g. in
#: Git). If the this version is pointing to a branch, then ``identifier``
#: will contain the branch name.
identifier = models.CharField(_('Identifier'), max_length=255)
#: The identifier is the ID for the revision this is version is for.
#: This might be the revision number (e.g. in SVN),
#: or the commit hash (e.g. in Git).
#: If the this version is pointing to a branch,
#: then ``identifier`` will contain the branch name.
#: NULL value means it will use the VCS default branch cloned.
identifier = models.CharField(
_("Identifier"), max_length=255, null=True, blank=True
)

#: This is the actual name that we got for the commit stored in
#: ``identifier``. This might be the tag or branch name like ``"v1.0.4"``.
Expand Down
1 change: 0 additions & 1 deletion readthedocs/core/views/hooks.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,6 @@ def build_branches(project, branch_list):
not_building = set()
for branch in branch_list:
versions = project.versions_from_branch_name(branch)

for version in versions:
log.debug(
'Processing.',
Expand Down
37 changes: 37 additions & 0 deletions readthedocs/projects/migrations/0095_default_branch_helptext.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# Generated by Django 3.2.13 on 2022-07-11 09:06

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
("projects", "0094_auto_20221221_1045"),
]

operations = [
migrations.AlterField(
model_name="historicalproject",
name="default_branch",
field=models.CharField(
blank=True,
default=None,
help_text='What branch "latest" points to. Leave empty to use the default value for your VCS.',
max_length=255,
null=True,
verbose_name="Default branch",
),
),
migrations.AlterField(
model_name="project",
name="default_branch",
field=models.CharField(
blank=True,
default=None,
help_text='What branch "latest" points to. Leave empty to use the default value for your VCS.',
max_length=255,
null=True,
verbose_name="Default branch",
),
),
]
22 changes: 8 additions & 14 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,8 +175,8 @@ class Project(models.Model):
default=LATEST,
help_text=_('The version of your project that / redirects to'),
)
# In default_branch, None means the backend should choose the
# appropriate branch. Eg 'master' for git
# In default_branch, ``None`` means the backend will use the default branch
# clonned for each backend.
default_branch = models.CharField(
_('Default branch'),
max_length=255,
Expand All @@ -185,8 +185,7 @@ class Project(models.Model):
blank=True,
help_text=_(
'What branch "latest" points to. Leave empty '
'to use the default value for your VCS (eg. '
'<code>trunk</code> or <code>master</code>).',
"to use the default value for your VCS.",
),
)
requirements_file = models.CharField(
Expand Down Expand Up @@ -480,19 +479,14 @@ def save(self, *args, **kwargs): # pylint: disable=arguments-differ
if not self.slug:
raise Exception(_('Model must have slug'))
super().save(*args, **kwargs)
try:
latest = self.versions.filter(slug=LATEST).first()
default_branch = self.get_default_branch()
if latest and latest.identifier != default_branch:
latest.identifier = default_branch
latest.save()
except Exception:
log.exception('Failed to update latest identifier')

# Update `Version.identifier` for `latest` with the default branch the use has selected,
# even if it's `None` (meaning to match the `default_branch` of the repository)
self.versions.filter(slug=LATEST).update(identifier=self.default_branch)

try:
branch = self.get_default_branch()
if not self.versions.filter(slug=LATEST).exists():
self.versions.create_latest(identifier=branch)
self.versions.create_latest()
except Exception:
log.exception('Error creating default branches')

Expand Down
30 changes: 22 additions & 8 deletions readthedocs/vcs_support/backends/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,11 +174,7 @@ def fetch(self):
code, stdout, stderr = self.run(*cmd)
return code, stdout, stderr

def checkout_revision(self, revision=None):
if not revision:
branch = self.default_branch or self.fallback_branch
revision = 'origin/%s' % branch

def checkout_revision(self, revision):
try:
code, out, err = self.run('git', 'checkout', '--force', revision)
return [code, out, err]
Expand All @@ -198,6 +194,24 @@ def clone(self):

try:
code, stdout, stderr = self.run(*cmd)

# TODO: for those VCS providers that don't tell us the `default_branch`
# of the repository in the incoming webhook,
# we need to get it from the cloned repository itself.
#
# cmd = ['git', 'symbolic-ref', 'refs/remotes/origin/HEAD']
# _, default_branch, _ = self.run(*cmd)
# default_branch = default_branch.replace('refs/remotes/origin/', '')
#
# The idea is to hit the APIv2 here to update the `latest` version with
# the `default_branch` we just got from the repository itself,
# after clonning it.
# However, we don't know the PK for the version we want to update.
Comment on lines +206 to +209
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could just add a get_default_branch method to the class here, and hit the API from the caller where we have the version?

self.vcs_repository.update()
identifier = self.data.build_commit or self.data.version.identifier
log.info("Checking out.", identifier=identifier)
self.vcs_repository.checkout(identifier)

Copy link
Member Author

@humitos humitos Jan 23, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm... We could, yes. I'm not sure if it's better than just initializing this class with a read Version object instead of the version_slug only.

  • the clone() method's caller is the same class; from outside, the method called is update() --but I understand it's the same for the purpose of your comment
  • I do see hitting the API from the clone() method a little more organic, since the caller doesn't need to think about updating the default_branch each time after calling clone() --However, I don't see it bad for now.

There are other potential solutions as well:

  1. initialize the class with version_pk too
  2. initialize the class with a complete Version object
  3. use APIv3 (instead of APIv2) from the builder that supports getting a Version object without the pk: https://docs.readthedocs.io/en/stable/api/v3.html#version-detail

It probably makes sense to use APIv3 since it's part of the organic transition to remove APIv2 internally.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hrm, thinking a little more about this... These ides will interfere with this other improvement #9736 where we will be cloning the exact branch directly, instead of the default_branch and then switching to the one wanted.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This could also be part of another issue to investigate a little more, if you think it's not mandatory for this PR for now. It only affects Bitbucket currently.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's fine to not include, but I just wanted to note that we have options here to get the data we want by thinking about getting it another way. 🤷

#
# api_v2.version(pk).patch(
# {'default_branch': default_branch}
# )

return code, stdout, stderr
except RepositoryError:
raise RepositoryError(RepositoryError.CLONE_ERROR())
Expand Down Expand Up @@ -322,12 +336,12 @@ def submodules(self):
def checkout(self, identifier=None):
"""Checkout to identifier or latest."""
super().checkout()
# Find proper identifier

# NOTE: if there is no identifier, we default to default branch cloned
if not identifier:
identifier = self.default_branch or self.fallback_branch
return

identifier = self.find_ref(identifier)

# Checkout the correct identifier for this branch.
code, out, err = self.checkout_revision(identifier)

Expand Down