Skip to content

Update build with "Pulling cache" when downloading the cache #6800

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 7 commits into from
Closed
6 changes: 6 additions & 0 deletions readthedocs/builds/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,22 @@


BUILD_STATE_TRIGGERED = 'triggered'
BUILD_STATE_PULLING_CACHE = 'pulling-cache'
BUILD_STATE_CLONING = 'cloning'
BUILD_STATE_INSTALLING = 'installing'
BUILD_STATE_BUILDING = 'building'
BUILD_STATE_UPLOADING = 'uploading'
BUILD_STATE_PUSHING_CACHE = 'pushing-cache'
BUILD_STATE_FINISHED = 'finished'

BUILD_STATE = (
(BUILD_STATE_TRIGGERED, _('Triggered')),
(BUILD_STATE_PULLING_CACHE, _('Pulling cache')),
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to call this something that works for both pushing & pulling, perhaps Updating cache.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want this, we will need to change how we handle the FINISHED status.

Currently, the build is first marked as FINISHED and then the push is performed. The FINISHED status is sent when we leave the context manager of the environment (__exit__) and the cache is pushed after that only if the build was successful.

Taking a look at the code, it seems that our options are:

  1. update the state of the build to FINISHED outside the environment __exit__ method
  2. use update_on_success=False when initializing the environment and update the build status to FINISHED manually after pushing the cache.
  3. push cache always (not only on successful builds)

I think that 2) is our better option since it uses the API we already have in the code to avoid updating the status automatically.

If we are going in this direction, why not to add an specific status for each operation? Pulling/Pushing or Downloading/Uploading or similar.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I went ahead and implemented 2).

It shows "Pushing cache" if the project has the feature and the build is marked as finished outside the __exit__ method. With these changes, the total build time will count pulling/pushing the cache as well, so it will be more realistic.

(BUILD_STATE_CLONING, _('Cloning')),
(BUILD_STATE_INSTALLING, _('Installing')),
(BUILD_STATE_BUILDING, _('Building')),
(BUILD_STATE_UPLOADING, _('Uploading')),
(BUILD_STATE_PUSHING_CACHE, _('Pushing cache')),
(BUILD_STATE_FINISHED, _('Finished')),
)

Expand Down
18 changes: 18 additions & 0 deletions readthedocs/builds/migrations/0015_pulling_cache_build_state.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.11 on 2020-03-24 15:59

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('builds', '0014_migrate-doctype-from-project-to-version'),
]

operations = [
migrations.AlterField(
model_name='build',
name='state',
field=models.CharField(choices=[('triggered', 'Triggered'), ('pulling-cache', 'Pulling cache'), ('cloning', 'Cloning'), ('installing', 'Installing'), ('building', 'Building'), ('pushing-cache', 'Pushing cache'), ('finished', 'Finished')], default='finished', max_length=55, verbose_name='State'),
),
]
4 changes: 3 additions & 1 deletion readthedocs/doc_builder/environments.py
Original file line number Diff line number Diff line change
Expand Up @@ -662,7 +662,7 @@ def done(self):
self.build['state'] == BUILD_STATE_FINISHED
)

def update_build(self, state=None):
def update_build(self, state=None, force=False):
"""
Record a build by hitting the API.
Expand Down Expand Up @@ -747,6 +747,8 @@ def update_build(self, state=None):
or (self.done and not self.successful)
# Otherwise, are we explicitly to not update?
or self.update_on_success
# Explicit force the update
or force
)
if update_build:
try:
Expand Down
21 changes: 16 additions & 5 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@
BUILD_STATE_CLONING,
BUILD_STATE_FINISHED,
BUILD_STATE_INSTALLING,
BUILD_STATE_PULLING_CACHE,
BUILD_STATE_PUSHING_CACHE,
BUILD_STATE_UPLOADING,
BUILD_STATUS_SUCCESS,
BUILD_STATUS_FAILURE,
LATEST,
Expand Down Expand Up @@ -93,10 +96,12 @@ class CachedEnvironmentMixin:

"""Mixin that pull/push cached environment to storage."""

def pull_cached_environment(self):
def pull_cached_environment(self, environment):
if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT):
return

environment.update_build(state=BUILD_STATE_PULLING_CACHE)

storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
filename = self.version.get_storage_environment_cache_path()

Expand All @@ -123,10 +128,12 @@ def pull_cached_environment(self):
with tarfile.open(fileobj=remote_fd) as tar:
tar.extractall(self.project.doc_path)

def push_cached_environment(self):
def push_cached_environment(self, environment):
if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT):
return

environment.update_build(state=BUILD_STATE_PUSHING_CACHE)

project_path = self.project.doc_path
directories = [
'checkouts',
Expand Down Expand Up @@ -362,7 +369,7 @@ def run(self, version_pk): # pylint: disable=arguments-differ
# repository in this step, and pushing it back will delete
# all the other cached things (Python packages, Sphinx,
# virtualenv, etc)
self.pull_cached_environment()
self.pull_cached_environment(environment)
self.sync_repo(environment)
return True
except RepositoryError:
Expand Down Expand Up @@ -586,7 +593,7 @@ def run_setup(self, record=True):
raise ProjectBuildsSkippedError
try:
with self.project.repo_nonblockinglock(version=self.version):
self.pull_cached_environment()
self.pull_cached_environment(environment)
self.setup_vcs(environment)
except vcs_support_utils.LockTimeout as e:
self.task.retry(exc=e, throw=False)
Expand Down Expand Up @@ -662,6 +669,7 @@ def run_build(self, record):
build=self.build,
record=record,
environment=env_vars,
update_on_success=False,

# Pass ``start_time`` here to not reset the timer
start_time=self.build_start_time,
Expand Down Expand Up @@ -712,6 +720,7 @@ def run_build(self, record):
localmedia=bool(outcomes['localmedia']),
pdf=bool(outcomes['pdf']),
epub=bool(outcomes['epub']),
environment=self.build_env,
)

# Finalize build and update web servers
Expand Down Expand Up @@ -745,7 +754,7 @@ def run_build(self, record):
self.send_notifications(self.version.pk, self.build['id'], email=False)

# Push cached environment on success for next build
self.push_cached_environment()
self.push_cached_environment(self.build_env)

if self.commit:
send_external_build_status(
Expand All @@ -772,6 +781,7 @@ def run_build(self, record):
}
)

self.build_env.update_build(BUILD_STATE_FINISHED, force=True)
build_complete.send(sender=Build, build=self.build_env.build)

@staticmethod
Expand Down Expand Up @@ -949,6 +959,7 @@ def store_build_artifacts(
)
return

environment.update_build(BUILD_STATE_UPLOADING)
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
log.info(
LOG_TEMPLATE,
Expand Down