Skip to content

Write build artifacts to (cloud) storage from build servers #5549

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 9 commits into from
May 9, 2019
95 changes: 54 additions & 41 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,24 +564,33 @@ def run_build(self, docker, record):
# TODO the build object should have an idea of these states,
# extend the model to include an idea of these outcomes
outcomes = self.build_docs()
build_id = self.build.get('id')
except vcs_support_utils.LockTimeout as e:
self.task.retry(exc=e, throw=False)
raise VersionLockedError
except SoftTimeLimitExceeded:
raise BuildTimeoutError

# Finalize build and update web servers
if build_id:
self.update_app_instances(
html=bool(outcomes['html']),
search=bool(outcomes['search']),
localmedia=bool(outcomes['localmedia']),
pdf=bool(outcomes['pdf']),
epub=bool(outcomes['epub']),
)
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this else on the try instead of the if now? I can never remember what try/else does.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. An else on a try is taken if there is no exception. I refactored that slightly because we defined build_id in a try block and then use it later. The only reason it wasn't ever a NameError is that we reraise the error in both except blocks.

log.warning('No build ID, not syncing files')
build_id = self.build.get('id')
if build_id:
# Store build artifacts to storage (local or cloud storage)
self.store_build_artifacts(
html=bool(outcomes['html']),
search=bool(outcomes['search']),
localmedia=bool(outcomes['localmedia']),
pdf=bool(outcomes['pdf']),
epub=bool(outcomes['epub']),
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We likely need to try/except this, so that if it fails, we still run the old syncers.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's fair, but I think I'll handle this try/except block in the function around the actual storage writes which could throw errors.


# Finalize build and update web servers
self.update_app_instances(
html=bool(outcomes['html']),
search=bool(outcomes['search']),
localmedia=bool(outcomes['localmedia']),
pdf=bool(outcomes['pdf']),
epub=bool(outcomes['epub']),
)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same here, this should probably be a try/except as well.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This method which only had code removed shouldn't really throw an exception randomly. Do you really think we need a try block for it?

else:
log.warning('No build ID, not syncing files')

if self.build_env.failed:
self.send_notifications()
Expand Down Expand Up @@ -715,6 +724,39 @@ def save_build_config(self):
})
self.build['config'] = config

def store_build_artifacts(
self,
html=False,
localmedia=False,
search=False,
pdf=False,
epub=False,
):
"""
Save build artifacts to "storage" using Django's storage API

The storage could be local filesystem storage OR cloud blob storage
such as S3, Azure storage or Google Cloud Storage.

Remove build artifacts of types not included in this build.

:param html: whether to save HTML output
:param localmedia: whether to save localmedia (htmlzip) output
:param search: whether to save search artifacts
:param pdf: whether to save PDF output
:param epub: whether to save ePub output
:return:
"""
if getattr(settings, 'WRITE_BUILD_MEDIA', False):
log.info(
LOG_TEMPLATE.format(
project=self.version.project.slug,
version=self.version.slug,
msg='Write build artifacts to media storage',
),
)
# TODO: This will be where we actually implement this

def update_app_instances(
self,
html=False,
Expand Down Expand Up @@ -745,25 +787,6 @@ def update_app_instances(

delete_unsynced_media = True

if getattr(storage, 'write_build_media', False):
# Handle the case where we want to upload some built assets to our storage
move_files.delay(
self.version.pk,
hostname,
self.config.doctype,
html=False,
search=False,
localmedia=localmedia,
pdf=pdf,
epub=epub,
delete_unsynced_media=delete_unsynced_media,
)
# Set variables so they don't get synced in the next broadcast step
localmedia = False
pdf = False
epub = False
delete_unsynced_media = False

# Broadcast finalization steps to web application instances
broadcast(
type='app',
Expand Down Expand Up @@ -1017,16 +1040,6 @@ def move_files(
),
])

if getattr(storage, 'write_build_media', False):
# Remove the media from remote storage if it exists
storage_path = version.project.get_storage_path(
type_=media_type,
version_slug=version.slug,
)
if storage.exists(storage_path):
log.info('Removing %s from media storage', storage_path)
storage.delete(storage_path)

log.debug(
LOG_TEMPLATE.format(
project=version.project.slug,
Expand Down