Skip to content

Pull/Push cached environment using storage #6763

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Mar 12, 2020
6 changes: 6 additions & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

import regex
from django.conf import settings
from django.core.files.storage import get_storage_class
from django.db import models
from django.db.models import F
from django.urls import reverse
Expand Down Expand Up @@ -451,6 +452,11 @@ def get_storage_paths(self):

return paths

def get_storage_environment_cache_path(self):
"""Return the path of the cached environment tar file."""
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
return storage.join(self.project.slug, f'{self.slug}.tar')

def clean_build_path(self):
"""
Clean build path for project version.
Expand Down
6 changes: 6 additions & 0 deletions readthedocs/core/utils/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,8 @@

import os

from django.conf import settings
from django.core.files.storage import get_storage_class
from django.shortcuts import get_object_or_404

from readthedocs.core.utils import broadcast
Expand All @@ -24,3 +26,7 @@ def wipe_version_via_slugs(version_slug, project_slug):
]
for del_dir in del_dirs:
broadcast(type='build', task=remove_dirs, args=[(del_dir,)])

# Delete the cache environment from storage
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
storage.delete(version.get_storage_environment_cache_path())
5 changes: 5 additions & 0 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1515,6 +1515,7 @@ def add_features(sender, **kwargs):
SKIP_SYNC_TAGS = 'skip_sync_tags'
SKIP_SYNC_BRANCHES = 'skip_sync_branches'
SKIP_SYNC = 'skip_sync'
CACHED_ENVIRONMENT = 'cached_environment'

FEATURES = (
(USE_SPHINX_LATEST, _('Use latest version of Sphinx')),
Expand Down Expand Up @@ -1585,6 +1586,10 @@ def add_features(sender, **kwargs):
SKIP_SYNC,
_('Skip symlinking and file syncing to webs'),
),
(
CACHED_ENVIRONMENT,
_('Cache the environment (virtualenv, conda, pip cache, repository) in storage'),
),
)

projects = models.ManyToManyField(
Expand Down
92 changes: 90 additions & 2 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import os
import shutil
import socket
import tarfile
import tempfile
from collections import Counter, defaultdict

import requests
Expand Down Expand Up @@ -87,6 +89,87 @@
log = logging.getLogger(__name__)


class CachedEnvironmentMixin:

"""Mixin that pull/push cached environment to storage."""

def pull_cached_environment(self):
if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT):
return

storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
filename = self.version.get_storage_environment_cache_path()

msg = 'Checking for cached environment'
log.debug(
LOG_TEMPLATE,
{
'project': self.project.slug,
'version': self.version.slug,
'msg': msg,
}
)
if storage.exists(filename):
msg = 'Pulling down cached environment from storage'
log.info(
LOG_TEMPLATE,
{
'project': self.project.slug,
'version': self.version.slug,
'msg': msg,
}
)
tmp_filename = tempfile.mktemp(suffix='.tar')
remote_fd = storage.open(filename, mode='rb')
with open(tmp_filename, mode='wb') as local_fd:
local_fd.write(remote_fd.read())

tar = tarfile.TarFile(tmp_filename)
tar.extractall(self.version.project.doc_path)


def push_cached_environment(self):
if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT):
return

project_path = self.project.doc_path
paths = [
os.path.join(project_path, 'checkouts', self.version.slug),
os.path.join(project_path, 'envs', self.version.slug),
os.path.join(project_path, 'conda', self.version.slug),
os.path.join(project_path, '.cache'),
]

tmp_filename = tempfile.mktemp(suffix='.tar')
# open just with 'w', to not compress and waste CPU cycles
with tarfile.open(tmp_filename, 'w') as tar:
for path in paths:
if os.path.exists(path):
tar.add(
path,
arcname=os.path.join(
os.path.basename(os.path.dirname(path)),
self.version.slug,
)
)

storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
with open(tmp_filename, 'rb') as fd:
msg = 'Pushing up cached environment to storage',
log.info(
LOG_TEMPLATE,
{
'project': self.project.slug,
'version': self.version.slug,
'msg': msg,
}
)
storage.save(
self.version.get_storage_environment_cache_path(),
fd,
)


class SyncRepositoryMixin:

"""Mixin that handles the VCS sync/update."""
Expand Down Expand Up @@ -230,7 +313,7 @@ def sync_repository_task(version_pk):
clean_build(version_pk)


class SyncRepositoryTaskStep(SyncRepositoryMixin):
class SyncRepositoryTaskStep(SyncRepositoryMixin, CachedEnvironmentMixin):

"""
Entry point to synchronize the VCS documentation.
Expand Down Expand Up @@ -271,6 +354,7 @@ def run(self, version_pk): # pylint: disable=arguments-differ
with environment:
before_vcs.send(sender=self.version, environment=environment)
with self.project.repo_nonblockinglock(version=self.version):
self.pull_cached_environment()
self.sync_repo(environment)
return True
except RepositoryError:
Expand Down Expand Up @@ -329,7 +413,7 @@ def update_docs_task(self, version_pk, *args, **kwargs):
clean_build(version_pk)


class UpdateDocsTaskStep(SyncRepositoryMixin):
class UpdateDocsTaskStep(SyncRepositoryMixin, CachedEnvironmentMixin):

"""
The main entry point for updating documentation.
Expand Down Expand Up @@ -492,6 +576,7 @@ def run_setup(self, record=True):
raise ProjectBuildsSkippedError
try:
with self.project.repo_nonblockinglock(version=self.version):
self.pull_cached_environment()
self.setup_vcs(environment)
except vcs_support_utils.LockTimeout as e:
self.task.retry(exc=e, throw=False)
Expand Down Expand Up @@ -646,6 +731,9 @@ def run_build(self, record):
# Send Webhook notification for build success.
self.send_notifications(self.version.pk, self.build['id'], email=False)

# Push cached environment on success for next build
self.push_cached_environment()

if self.commit:
send_external_build_status(
version_type=self.version.type,
Expand Down
2 changes: 2 additions & 0 deletions readthedocs/settings/docker_compose.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ def DATABASES(self): # noqa
RTD_BUILD_MEDIA_STORAGE = 'readthedocs.storage.azure_storage.AzureBuildMediaStorage'
AZURE_STATIC_STORAGE_HOSTNAME = PRODUCTION_DOMAIN

RTD_BUILD_ENVIRONMENT_STORAGE = 'readthedocs.storage.azure_storage.AzureBuildEnvironmentStorage'

# Storage for static files (those collected with `collectstatic`)
STATICFILES_STORAGE = 'readthedocs.storage.azure_storage.AzureStaticStorage'

Expand Down
5 changes: 5 additions & 0 deletions readthedocs/storage/azure_storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,11 @@ class AzureBuildStorage(AzureStorage):
azure_container = getattr(settings, 'AZURE_BUILD_STORAGE_CONTAINER', None) or 'builds'


class AzureBuildEnvironmentStorage(BuildMediaStorageMixin, AzureStorage):

azure_container = getattr(settings, 'AZURE_BUILD_ENVIRONMENT_STORAGE_CONTAINER', None) or 'envs'


class AzureStaticStorage(OverrideHostnameMixin, ManifestFilesMixin, AzureStorage):

"""
Expand Down