diff --git a/readthedocs/api/v2/views/model_views.py b/readthedocs/api/v2/views/model_views.py index 9c2b1014d0a..a077a60c551 100644 --- a/readthedocs/api/v2/views/model_views.py +++ b/readthedocs/api/v2/views/model_views.py @@ -5,7 +5,6 @@ from allauth.socialaccount.models import SocialAccount from django.conf import settings -from django.core.files.storage import get_storage_class from django.shortcuts import get_object_or_404 from django.template.loader import render_to_string from rest_framework import decorators, permissions, status, viewsets @@ -19,6 +18,7 @@ from readthedocs.oauth.models import RemoteOrganization, RemoteRepository from readthedocs.oauth.services import GitHubService, registry from readthedocs.projects.models import Domain, Project +from readthedocs.storage import build_commands_storage from ..permissions import APIPermission, APIRestrictedPermission, IsOwner from ..serializers import ( @@ -257,14 +257,13 @@ def retrieve(self, *args, **kwargs): serializer = self.get_serializer(instance) data = serializer.data if instance.cold_storage: - storage = get_storage_class(settings.RTD_BUILD_COMMANDS_STORAGE)() storage_path = '{date}/{id}.json'.format( date=str(instance.date.date()), id=instance.id, ) - if storage.exists(storage_path): + if build_commands_storage.exists(storage_path): try: - json_resp = storage.open(storage_path).read() + json_resp = build_commands_storage.open(storage_path).read() data['commands'] = json.loads(json_resp) except Exception: log.exception( diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index af20a2bdb3a..7a48ee64db2 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -8,7 +8,6 @@ import regex from django.conf import settings -from django.core.files.storage import get_storage_class from django.db import models from django.db.models import F from django.urls import reverse @@ -88,6 +87,7 @@ ) from readthedocs.projects.models import APIProject, Project from readthedocs.projects.version_handling import determine_stable_version +from readthedocs.storage import build_environment_storage log = logging.getLogger(__name__) @@ -411,8 +411,7 @@ def get_storage_paths(self): def get_storage_environment_cache_path(self): """Return the path of the cached environment tar file.""" - storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)() - return storage.join(self.project.slug, f'{self.slug}.tar') + return build_environment_storage.join(self.project.slug, f'{self.slug}.tar') def clean_build_path(self): """ diff --git a/readthedocs/builds/tasks.py b/readthedocs/builds/tasks.py index 8c17e67fcd5..4534a580bc9 100644 --- a/readthedocs/builds/tasks.py +++ b/readthedocs/builds/tasks.py @@ -5,7 +5,6 @@ from celery import Task from django.conf import settings -from django.core.files.storage import get_storage_class from readthedocs.api.v2.serializers import BuildSerializer from readthedocs.api.v2.utils import ( @@ -27,6 +26,7 @@ from readthedocs.core.utils import trigger_build from readthedocs.projects.models import Project from readthedocs.projects.tasks import send_build_status +from readthedocs.storage import build_commands_storage from readthedocs.worker import app log = logging.getLogger(__name__) @@ -158,7 +158,6 @@ def archive_builds_task(days=14, limit=200, include_cold=False, delete=False): queryset = queryset.exclude(cold_storage=True) queryset = queryset.filter(date__lt=max_date)[:limit] - storage = get_storage_class(settings.RTD_BUILD_COMMANDS_STORAGE)() for build in queryset: data = BuildSerializer(build).data['commands'] if data: @@ -172,7 +171,7 @@ def archive_builds_task(days=14, limit=200, include_cold=False, delete=False): output.seek(0) filename = '{date}/{id}.json'.format(date=str(build.date.date()), id=build.id) try: - storage.save(name=filename, content=output) + build_commands_storage.save(name=filename, content=output) build.cold_storage = True build.save() if delete: diff --git a/readthedocs/core/utils/general.py b/readthedocs/core/utils/general.py index f1b5e1c0c5b..cbdb202e161 100644 --- a/readthedocs/core/utils/general.py +++ b/readthedocs/core/utils/general.py @@ -2,13 +2,12 @@ import os -from django.conf import settings -from django.core.files.storage import get_storage_class from django.shortcuts import get_object_or_404 from readthedocs.core.utils import broadcast from readthedocs.projects.tasks import remove_dirs from readthedocs.builds.models import Version +from readthedocs.storage import build_environment_storage def wipe_version_via_slugs(version_slug, project_slug): @@ -28,5 +27,4 @@ def wipe_version_via_slugs(version_slug, project_slug): broadcast(type='build', task=remove_dirs, args=[(del_dir,)]) # Delete the cache environment from storage - storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)() - storage.delete(version.get_storage_environment_cache_path()) + build_environment_storage.delete(version.get_storage_environment_cache_path()) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 642d9304a76..4033d947e5e 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -11,7 +11,6 @@ from django.conf import settings from django.conf.urls import include from django.contrib.auth.models import User -from django.core.files.storage import get_storage_class from django.core.validators import MaxValueValidator, MinValueValidator from django.db import models from django.db.models import Prefetch @@ -46,6 +45,7 @@ ) from readthedocs.projects.version_handling import determine_stable_version from readthedocs.search.parsers import MkDocsParser, SphinxParser +from readthedocs.storage import build_media_storage from readthedocs.vcs_support.backends import backend_cls from readthedocs.vcs_support.utils import Lock, NonBlockingLock @@ -844,12 +844,11 @@ def has_good_build(self): return self.builds(manager=INTERNAL).filter(success=True).exists() def has_media(self, type_, version_slug=LATEST, version_type=None): - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() storage_path = self.get_storage_path( type_=type_, version_slug=version_slug, version_type=version_type ) - return storage.exists(storage_path) + return build_media_storage.exists(storage_path) def has_pdf(self, version_slug=LATEST, version_type=None): return self.has_media( diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 86c1895d44c..8cfc263a79d 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -21,7 +21,6 @@ import requests from celery.exceptions import SoftTimeLimitExceeded from django.conf import settings -from django.core.files.storage import get_storage_class from django.db.models import Q from django.urls import reverse from django.utils import timezone @@ -73,6 +72,7 @@ from readthedocs.projects.models import APIProject, Feature from readthedocs.search.utils import index_new_files, remove_indexed_files from readthedocs.sphinx_domains.models import SphinxDomain +from readthedocs.storage import build_media_storage, build_environment_storage from readthedocs.vcs_support import utils as vcs_support_utils from readthedocs.worker import app @@ -98,7 +98,6 @@ def pull_cached_environment(self): if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT): return - storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)() filename = self.version.get_storage_environment_cache_path() msg = 'Checking for cached environment' @@ -110,7 +109,7 @@ def pull_cached_environment(self): 'msg': msg, } ) - if storage.exists(filename): + if build_environment_storage.exists(filename): msg = 'Pulling down cached environment from storage' log.info( LOG_TEMPLATE, @@ -120,7 +119,7 @@ def pull_cached_environment(self): 'msg': msg, } ) - remote_fd = storage.open(filename, mode='rb') + remote_fd = build_environment_storage.open(filename, mode='rb') with tarfile.open(fileobj=remote_fd) as tar: tar.extractall(self.project.doc_path) @@ -153,7 +152,6 @@ def push_cached_environment(self): if os.path.exists(path): tar.add(path, arcname='.cache') - storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)() with open(tmp_filename, 'rb') as fd: msg = 'Pushing up cached environment to storage' log.info( @@ -164,7 +162,7 @@ def push_cached_environment(self): 'msg': msg, } ) - storage.save( + build_environment_storage.save( self.version.get_storage_environment_cache_path(), fd, ) @@ -1030,7 +1028,6 @@ def store_build_artifacts( :param pdf: whether to save PDF output :param epub: whether to save ePub output """ - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() log.info( LOG_TEMPLATE, { @@ -1086,7 +1083,7 @@ def store_build_artifacts( }, ) try: - storage.sync_directory(from_path, to_path) + build_media_storage.sync_directory(from_path, to_path) except Exception: # Ideally this should just be an IOError # but some storage backends unfortunately throw other errors @@ -1115,7 +1112,7 @@ def store_build_artifacts( }, ) try: - storage.delete_directory(media_path) + build_media_storage.delete_directory(media_path) except Exception: # Ideally this should just be an IOError # but some storage backends unfortunately throw other errors @@ -1376,8 +1373,6 @@ def _create_intersphinx_data(version, commit, build): if not version.is_sphinx_type: return - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - html_storage_path = version.project.get_storage_path( type_='html', version_slug=version.slug, include_file=False ) @@ -1385,17 +1380,17 @@ def _create_intersphinx_data(version, commit, build): type_='json', version_slug=version.slug, include_file=False ) - object_file = storage.join(html_storage_path, 'objects.inv') - if not storage.exists(object_file): + object_file = build_media_storage.join(html_storage_path, 'objects.inv') + if not build_media_storage.exists(object_file): log.debug('No objects.inv, skipping intersphinx indexing.') return - type_file = storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json') + type_file = build_media_storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json') types = {} titles = {} - if storage.exists(type_file): + if build_media_storage.exists(type_file): try: - data = json.load(storage.open(type_file)) + data = json.load(build_media_storage.open(type_file)) types = data['types'] titles = data['titles'] except Exception: @@ -1415,7 +1410,7 @@ def warn(self, msg): log.warning('Sphinx MockApp: %s', msg) # Re-create all objects from the new build of the version - object_file_url = storage.url(object_file) + object_file_url = build_media_storage.url(object_file) if object_file_url.startswith('/'): # Filesystem backed storage simply prepends MEDIA_URL to the path to get the URL # This can cause an issue if MEDIA_URL is not fully qualified @@ -1537,15 +1532,13 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign :returns: paths of changed files :rtype: set """ - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - changed_files = set() # Re-create all objects from the new build of the version storage_path = version.project.get_storage_path( type_='html', version_slug=version.slug, include_file=False ) - for root, __, filenames in storage.walk(storage_path): + for root, __, filenames in build_media_storage.walk(storage_path): for filename in filenames: if filename.endswith('.html'): model_class = HTMLFile @@ -1556,13 +1549,13 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign # For projects not behind a CDN, we don't care about non-HTML continue - full_path = storage.join(root, filename) + full_path = build_media_storage.join(root, filename) # Generate a relative path for storage similar to os.path.relpath relpath = full_path.replace(storage_path, '', 1).lstrip('/') try: - md5 = hashlib.md5(storage.open(full_path, 'rb').read()).hexdigest() + md5 = hashlib.md5(build_media_storage.open(full_path, 'rb').read()).hexdigest() except Exception: log.exception( 'Error while generating md5 for %s:%s:%s. Don\'t stop.', @@ -1808,10 +1801,9 @@ def remove_build_storage_paths(paths): :param paths: list of paths in build media storage to delete """ - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() for storage_path in paths: log.info('Removing %s from media storage', storage_path) - storage.delete_directory(storage_path) + build_media_storage.delete_directory(storage_path) @app.task(queue='web') diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 330b3bc1e77..b3afdd485d7 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -13,7 +13,6 @@ from django.conf import settings from django.contrib import messages from django.core.cache import cache -from django.core.files.storage import get_storage_class from django.db.models import prefetch_related_objects from django.http import HttpResponse from django.shortcuts import get_object_or_404, redirect, render @@ -37,6 +36,7 @@ from readthedocs.projects.views.mixins import ProjectRelationListMixin from readthedocs.proxito.views.mixins import ServeDocsMixin from readthedocs.proxito.views.utils import _get_project_data_from_request +from readthedocs.storage import build_media_storage from ..constants import PRIVATE from .base import ProjectOnboardMixin @@ -365,7 +365,6 @@ def get( uip=get_client_ip(request), ) - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() storage_path = version.project.get_storage_path( type_=type_, version_slug=version_slug, @@ -373,7 +372,7 @@ def get( ) # URL without scheme and domain to perform an NGINX internal redirect - url = storage.url(storage_path) + url = build_media_storage.url(storage_path) url = urlparse(url)._replace(scheme='', netloc='').geturl() return self._serve_docs( diff --git a/readthedocs/proxito/tests/base.py b/readthedocs/proxito/tests/base.py index daee545b30f..fd64857e858 100644 --- a/readthedocs/proxito/tests/base.py +++ b/readthedocs/proxito/tests/base.py @@ -3,17 +3,25 @@ import pytest import django_dynamic_fixture as fixture +from django.conf import settings from django.contrib.auth.models import User +from django.core.files.storage import get_storage_class from django.test import TestCase from readthedocs.projects.constants import PUBLIC from readthedocs.projects.models import Project, Domain +from readthedocs.proxito.views import serve @pytest.mark.proxito class BaseDocServing(TestCase): def setUp(self): + # Re-initialize storage + # Various tests override either this setting or various aspects of the storage engine + # By resetting it every test case, we avoid this caching (which is a huge benefit in prod) + serve.build_media_storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + self.eric = fixture.get(User, username='eric') self.eric.set_password('eric') self.eric.save() diff --git a/readthedocs/proxito/views/mixins.py b/readthedocs/proxito/views/mixins.py index a3f91edd035..39496393035 100644 --- a/readthedocs/proxito/views/mixins.py +++ b/readthedocs/proxito/views/mixins.py @@ -5,7 +5,6 @@ from slugify import slugify as unicode_slugify from django.conf import settings -from django.core.files.storage import get_storage_class from django.http import ( HttpResponse, HttpResponseRedirect, @@ -18,6 +17,7 @@ from readthedocs.builds.constants import EXTERNAL, INTERNAL from readthedocs.core.resolver import resolve from readthedocs.redirects.exceptions import InfiniteRedirectException +from readthedocs.storage import build_media_storage log = logging.getLogger(__name__) # noqa @@ -72,8 +72,7 @@ def _serve_docs_python(self, request, final_project, path): """ log.debug('[Django serve] path=%s, project=%s', path, final_project.slug) - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - root_path = storage.path('') + root_path = build_media_storage.path('') # Serve from Python return serve(request, path, root_path) diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index eed8c12062f..59b5ad92536 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -5,8 +5,6 @@ from urllib.parse import urlparse from readthedocs.core.resolver import resolve_path -from django.conf import settings -from django.core.files.storage import get_storage_class from django.http import Http404, HttpResponse, HttpResponseRedirect from django.shortcuts import render from django.urls import resolve as url_resolve @@ -21,6 +19,7 @@ from readthedocs.projects.constants import SPHINX_HTMLDIR from readthedocs.projects.templatetags.projects_tags import sort_version_aware from readthedocs.redirects.exceptions import InfiniteRedirectException +from readthedocs.storage import build_media_storage from .decorators import map_project_slug from .mixins import ServeDocsMixin, ServeRedirectMixin @@ -145,10 +144,8 @@ def get(self, version_type=self.version_type, ) - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - # If ``filename`` is empty, serve from ``/`` - path = storage.join(storage_path, filename.lstrip('/')) + path = build_media_storage.join(storage_path, filename.lstrip('/')) # Handle our backend storage not supporting directory indexes, # so we need to append index.html when appropriate. if path[-1] == '/': @@ -157,7 +154,7 @@ def get(self, path += 'index.html' # NOTE: calling ``.url`` will remove the trailing slash - storage_url = storage.url(path, http_method=request.method) + storage_url = build_media_storage.url(path, http_method=request.method) # URL without scheme and domain to perform an NGINX internal redirect parsed_url = urlparse(storage_url)._replace(scheme='', netloc='') @@ -217,11 +214,10 @@ def get(self, request, proxito_path, template_name='404.html'): include_file=False, version_type=self.version_type, ) - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() # First, check for dirhtml with slash for tryfile in ('index.html', 'README.html'): - storage_filename_path = storage.join( + storage_filename_path = build_media_storage.join( storage_root_path, f'{filename}/{tryfile}'.lstrip('/'), ) @@ -231,7 +227,7 @@ def get(self, request, proxito_path, template_name='404.html'): version_slug, storage_filename_path, ) - if storage.exists(storage_filename_path): + if build_media_storage.exists(storage_filename_path): log.info( 'Redirecting to index file: project=%s version=%s, storage_path=%s', final_project.slug, @@ -322,14 +318,14 @@ def get(self, request, proxito_path, template_name='404.html'): if doc_type_404 == SPHINX_HTMLDIR: tryfiles.append('404/index.html') for tryfile in tryfiles: - storage_filename_path = storage.join(storage_root_path, tryfile) - if storage.exists(storage_filename_path): + storage_filename_path = build_media_storage.join(storage_root_path, tryfile) + if build_media_storage.exists(storage_filename_path): log.info( 'Serving custom 404.html page: [project: %s] [version: %s]', final_project.slug, version_slug_404, ) - resp = HttpResponse(storage.open(storage_filename_path).read()) + resp = HttpResponse(build_media_storage.open(storage_filename_path).read()) resp.status_code = 404 return resp @@ -369,18 +365,16 @@ def get(self, request, project): # ... we do return a 404 raise Http404() - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - storage_path = project.get_storage_path( type_='html', version_slug=version_slug, include_file=False, version_type=self.version_type, ) - path = storage.join(storage_path, 'robots.txt') + path = build_media_storage.join(storage_path, 'robots.txt') - if storage.exists(path): - url = storage.url(path) + if build_media_storage.exists(path): + url = build_media_storage.url(path) url = urlparse(url)._replace(scheme='', netloc='').geturl() return self._serve_docs( request, diff --git a/readthedocs/search/parsers.py b/readthedocs/search/parsers.py index b4dacae8af3..c31d35cbbee 100644 --- a/readthedocs/search/parsers.py +++ b/readthedocs/search/parsers.py @@ -7,10 +7,10 @@ from urllib.parse import urlparse import orjson as json -from django.conf import settings -from django.core.files.storage import get_storage_class from selectolax.parser import HTMLParser +from readthedocs.storage import build_media_storage + log = logging.getLogger(__name__) @@ -22,7 +22,7 @@ class BaseParser: def __init__(self, version): self.version = version self.project = self.version.project - self.storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + self.storage = build_media_storage def _get_page_content(self, page): """Gets the page content from storage.""" diff --git a/readthedocs/storage/__init__.py b/readthedocs/storage/__init__.py index e69de29bb2d..644fb55e7c2 100644 --- a/readthedocs/storage/__init__.py +++ b/readthedocs/storage/__init__.py @@ -0,0 +1,31 @@ +""" +Provides lazy loaded storage instances for use throughout Read the Docs. + +For static files storage, use django.contrib.staticfiles.storage.staticfiles_storage. +Some storage backends (notably S3) have a slow instantiation time +so doing those upfront improves performance. +""" +from django.conf import settings + +from django.core.files.storage import get_storage_class +from django.utils.functional import LazyObject + + +class ConfiguredBuildMediaStorage(LazyObject): + def _setup(self): + self._wrapped = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + + +class ConfiguredBuildEnvironmentStorage(LazyObject): + def _setup(self): + self._wrapped = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)() + + +class ConfiguredBuildCommandsStorage(LazyObject): + def _setup(self): + self._wrapped = get_storage_class(settings.RTD_BUILD_COMMANDS_STORAGE)() + + +build_media_storage = ConfiguredBuildMediaStorage() +build_environment_storage = ConfiguredBuildEnvironmentStorage() +build_commands_storage = ConfiguredBuildCommandsStorage() diff --git a/readthedocs/storage/s3_storage.py b/readthedocs/storage/s3_storage.py index f24a432f216..5805581ad4f 100644 --- a/readthedocs/storage/s3_storage.py +++ b/readthedocs/storage/s3_storage.py @@ -22,7 +22,7 @@ class S3BuildMediaStorage(BuildMediaStorageMixin, OverrideHostnameMixin, S3Boto3 """An AWS S3 Storage backend for build artifacts.""" - bucket_name = getattr(settings, 'S3_MEDIA_STORAGE_BUCKET') + bucket_name = getattr(settings, 'S3_MEDIA_STORAGE_BUCKET', None) override_hostname = getattr(settings, 'S3_MEDIA_STORAGE_OVERRIDE_HOSTNAME', None) def __init__(self, *args, **kwargs): @@ -63,7 +63,7 @@ class S3StaticStorage(OverrideHostnameMixin, ManifestFilesMixin, S3Boto3Storage) * Uses Django's ManifestFilesMixin to have unique file paths (eg. core.a6f5e2c.css) """ - bucket_name = getattr(settings, 'S3_STATIC_STORAGE_BUCKET') + bucket_name = getattr(settings, 'S3_STATIC_STORAGE_BUCKET', None) override_hostname = getattr(settings, 'S3_STATIC_STORAGE_OVERRIDE_HOSTNAME', None) def __init__(self, *args, **kwargs): @@ -82,7 +82,7 @@ def __init__(self, *args, **kwargs): class S3BuildEnvironmentStorage(BuildMediaStorageMixin, S3Boto3Storage): - bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET') + bucket_name = getattr(settings, 'S3_BUILD_ENVIRONMENT_STORAGE_BUCKET', None) def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs)