Skip to content

Commit c785d89

Browse files
committed
Make storage classes into module level vars
Some storage backends (notably S3) have extra startup times for the first connection to the backend (~50ms) which means that the storage object has to be reused for the best performance. This reuses them by making our storage instances into lazy loaded module level variables exactly like Django does with `staticfiles_storage`.
1 parent 44d2540 commit c785d89

File tree

13 files changed

+87
-70
lines changed

13 files changed

+87
-70
lines changed

readthedocs/api/v2/views/model_views.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from allauth.socialaccount.models import SocialAccount
77
from django.conf import settings
8-
from django.core.files.storage import get_storage_class
98
from django.shortcuts import get_object_or_404
109
from django.template.loader import render_to_string
1110
from rest_framework import decorators, permissions, status, viewsets
@@ -19,6 +18,7 @@
1918
from readthedocs.oauth.models import RemoteOrganization, RemoteRepository
2019
from readthedocs.oauth.services import GitHubService, registry
2120
from readthedocs.projects.models import Domain, Project
21+
from readthedocs.storage import build_commands_storage
2222

2323
from ..permissions import APIPermission, APIRestrictedPermission, IsOwner
2424
from ..serializers import (
@@ -257,14 +257,13 @@ def retrieve(self, *args, **kwargs):
257257
serializer = self.get_serializer(instance)
258258
data = serializer.data
259259
if instance.cold_storage:
260-
storage = get_storage_class(settings.RTD_BUILD_COMMANDS_STORAGE)()
261260
storage_path = '{date}/{id}.json'.format(
262261
date=str(instance.date.date()),
263262
id=instance.id,
264263
)
265-
if storage.exists(storage_path):
264+
if build_commands_storage.exists(storage_path):
266265
try:
267-
json_resp = storage.open(storage_path).read()
266+
json_resp = build_commands_storage.open(storage_path).read()
268267
data['commands'] = json.loads(json_resp)
269268
except Exception:
270269
log.exception(

readthedocs/builds/models.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88

99
import regex
1010
from django.conf import settings
11-
from django.core.files.storage import get_storage_class
1211
from django.db import models
1312
from django.db.models import F
1413
from django.urls import reverse
@@ -88,6 +87,7 @@
8887
)
8988
from readthedocs.projects.models import APIProject, Project
9089
from readthedocs.projects.version_handling import determine_stable_version
90+
from readthedocs.storage import build_environment_storage
9191

9292
log = logging.getLogger(__name__)
9393

@@ -411,8 +411,7 @@ def get_storage_paths(self):
411411

412412
def get_storage_environment_cache_path(self):
413413
"""Return the path of the cached environment tar file."""
414-
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
415-
return storage.join(self.project.slug, f'{self.slug}.tar')
414+
return build_environment_storage.join(self.project.slug, f'{self.slug}.tar')
416415

417416
def clean_build_path(self):
418417
"""

readthedocs/builds/tasks.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55

66
from celery import Task
77
from django.conf import settings
8-
from django.core.files.storage import get_storage_class
98

109
from readthedocs.api.v2.serializers import BuildSerializer
1110
from readthedocs.api.v2.utils import (
@@ -27,6 +26,7 @@
2726
from readthedocs.core.utils import trigger_build
2827
from readthedocs.projects.models import Project
2928
from readthedocs.projects.tasks import send_build_status
29+
from readthedocs.storage import build_commands_storage
3030
from readthedocs.worker import app
3131

3232
log = logging.getLogger(__name__)
@@ -158,7 +158,6 @@ def archive_builds_task(days=14, limit=200, include_cold=False, delete=False):
158158
queryset = queryset.exclude(cold_storage=True)
159159
queryset = queryset.filter(date__lt=max_date)[:limit]
160160

161-
storage = get_storage_class(settings.RTD_BUILD_COMMANDS_STORAGE)()
162161
for build in queryset:
163162
data = BuildSerializer(build).data['commands']
164163
if data:
@@ -172,7 +171,7 @@ def archive_builds_task(days=14, limit=200, include_cold=False, delete=False):
172171
output.seek(0)
173172
filename = '{date}/{id}.json'.format(date=str(build.date.date()), id=build.id)
174173
try:
175-
storage.save(name=filename, content=output)
174+
build_commands_storage.save(name=filename, content=output)
176175
build.cold_storage = True
177176
build.save()
178177
if delete:

readthedocs/core/utils/general.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,13 +2,12 @@
22

33
import os
44

5-
from django.conf import settings
6-
from django.core.files.storage import get_storage_class
75
from django.shortcuts import get_object_or_404
86

97
from readthedocs.core.utils import broadcast
108
from readthedocs.projects.tasks import remove_dirs
119
from readthedocs.builds.models import Version
10+
from readthedocs.storage import build_environment_storage
1211

1312

1413
def wipe_version_via_slugs(version_slug, project_slug):
@@ -28,5 +27,4 @@ def wipe_version_via_slugs(version_slug, project_slug):
2827
broadcast(type='build', task=remove_dirs, args=[(del_dir,)])
2928

3029
# Delete the cache environment from storage
31-
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
32-
storage.delete(version.get_storage_environment_cache_path())
30+
build_environment_storage.delete(version.get_storage_environment_cache_path())

readthedocs/projects/models.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from django.conf import settings
1212
from django.conf.urls import include
1313
from django.contrib.auth.models import User
14-
from django.core.files.storage import get_storage_class
1514
from django.core.validators import MaxValueValidator, MinValueValidator
1615
from django.db import models
1716
from django.db.models import Prefetch
@@ -46,6 +45,7 @@
4645
)
4746
from readthedocs.projects.version_handling import determine_stable_version
4847
from readthedocs.search.parsers import MkDocsParser, SphinxParser
48+
from readthedocs.storage import build_media_storage
4949
from readthedocs.vcs_support.backends import backend_cls
5050
from readthedocs.vcs_support.utils import Lock, NonBlockingLock
5151

@@ -844,12 +844,11 @@ def has_good_build(self):
844844
return self.builds(manager=INTERNAL).filter(success=True).exists()
845845

846846
def has_media(self, type_, version_slug=LATEST, version_type=None):
847-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
848847
storage_path = self.get_storage_path(
849848
type_=type_, version_slug=version_slug,
850849
version_type=version_type
851850
)
852-
return storage.exists(storage_path)
851+
return build_media_storage.exists(storage_path)
853852

854853
def has_pdf(self, version_slug=LATEST, version_type=None):
855854
return self.has_media(

readthedocs/projects/tasks.py

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
import requests
2222
from celery.exceptions import SoftTimeLimitExceeded
2323
from django.conf import settings
24-
from django.core.files.storage import get_storage_class
2524
from django.db.models import Q
2625
from django.urls import reverse
2726
from django.utils import timezone
@@ -73,6 +72,7 @@
7372
from readthedocs.projects.models import APIProject, Feature
7473
from readthedocs.search.utils import index_new_files, remove_indexed_files
7574
from readthedocs.sphinx_domains.models import SphinxDomain
75+
from readthedocs.storage import build_media_storage, build_environment_storage
7676
from readthedocs.vcs_support import utils as vcs_support_utils
7777
from readthedocs.worker import app
7878

@@ -98,7 +98,6 @@ def pull_cached_environment(self):
9898
if not self.project.has_feature(feature_id=Feature.CACHED_ENVIRONMENT):
9999
return
100100

101-
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
102101
filename = self.version.get_storage_environment_cache_path()
103102

104103
msg = 'Checking for cached environment'
@@ -110,7 +109,7 @@ def pull_cached_environment(self):
110109
'msg': msg,
111110
}
112111
)
113-
if storage.exists(filename):
112+
if build_environment_storage.exists(filename):
114113
msg = 'Pulling down cached environment from storage'
115114
log.info(
116115
LOG_TEMPLATE,
@@ -120,7 +119,7 @@ def pull_cached_environment(self):
120119
'msg': msg,
121120
}
122121
)
123-
remote_fd = storage.open(filename, mode='rb')
122+
remote_fd = build_environment_storage.open(filename, mode='rb')
124123
with tarfile.open(fileobj=remote_fd) as tar:
125124
tar.extractall(self.project.doc_path)
126125

@@ -153,7 +152,6 @@ def push_cached_environment(self):
153152
if os.path.exists(path):
154153
tar.add(path, arcname='.cache')
155154

156-
storage = get_storage_class(settings.RTD_BUILD_ENVIRONMENT_STORAGE)()
157155
with open(tmp_filename, 'rb') as fd:
158156
msg = 'Pushing up cached environment to storage'
159157
log.info(
@@ -164,7 +162,7 @@ def push_cached_environment(self):
164162
'msg': msg,
165163
}
166164
)
167-
storage.save(
165+
build_environment_storage.save(
168166
self.version.get_storage_environment_cache_path(),
169167
fd,
170168
)
@@ -1030,7 +1028,6 @@ def store_build_artifacts(
10301028
:param pdf: whether to save PDF output
10311029
:param epub: whether to save ePub output
10321030
"""
1033-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
10341031
log.info(
10351032
LOG_TEMPLATE,
10361033
{
@@ -1086,7 +1083,7 @@ def store_build_artifacts(
10861083
},
10871084
)
10881085
try:
1089-
storage.sync_directory(from_path, to_path)
1086+
build_media_storage.sync_directory(from_path, to_path)
10901087
except Exception:
10911088
# Ideally this should just be an IOError
10921089
# but some storage backends unfortunately throw other errors
@@ -1115,7 +1112,7 @@ def store_build_artifacts(
11151112
},
11161113
)
11171114
try:
1118-
storage.delete_directory(media_path)
1115+
build_media_storage.delete_directory(media_path)
11191116
except Exception:
11201117
# Ideally this should just be an IOError
11211118
# but some storage backends unfortunately throw other errors
@@ -1376,26 +1373,24 @@ def _create_intersphinx_data(version, commit, build):
13761373
if not version.is_sphinx_type:
13771374
return
13781375

1379-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
1380-
13811376
html_storage_path = version.project.get_storage_path(
13821377
type_='html', version_slug=version.slug, include_file=False
13831378
)
13841379
json_storage_path = version.project.get_storage_path(
13851380
type_='json', version_slug=version.slug, include_file=False
13861381
)
13871382

1388-
object_file = storage.join(html_storage_path, 'objects.inv')
1389-
if not storage.exists(object_file):
1383+
object_file = build_media_storage.join(html_storage_path, 'objects.inv')
1384+
if not build_media_storage.exists(object_file):
13901385
log.debug('No objects.inv, skipping intersphinx indexing.')
13911386
return
13921387

1393-
type_file = storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json')
1388+
type_file = build_media_storage.join(json_storage_path, 'readthedocs-sphinx-domain-names.json')
13941389
types = {}
13951390
titles = {}
1396-
if storage.exists(type_file):
1391+
if build_media_storage.exists(type_file):
13971392
try:
1398-
data = json.load(storage.open(type_file))
1393+
data = json.load(build_media_storage.open(type_file))
13991394
types = data['types']
14001395
titles = data['titles']
14011396
except Exception:
@@ -1415,7 +1410,7 @@ def warn(self, msg):
14151410
log.warning('Sphinx MockApp: %s', msg)
14161411

14171412
# Re-create all objects from the new build of the version
1418-
object_file_url = storage.url(object_file)
1413+
object_file_url = build_media_storage.url(object_file)
14191414
if object_file_url.startswith('/'):
14201415
# Filesystem backed storage simply prepends MEDIA_URL to the path to get the URL
14211416
# This can cause an issue if MEDIA_URL is not fully qualified
@@ -1537,15 +1532,13 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign
15371532
:returns: paths of changed files
15381533
:rtype: set
15391534
"""
1540-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
1541-
15421535
changed_files = set()
15431536

15441537
# Re-create all objects from the new build of the version
15451538
storage_path = version.project.get_storage_path(
15461539
type_='html', version_slug=version.slug, include_file=False
15471540
)
1548-
for root, __, filenames in storage.walk(storage_path):
1541+
for root, __, filenames in build_media_storage.walk(storage_path):
15491542
for filename in filenames:
15501543
if filename.endswith('.html'):
15511544
model_class = HTMLFile
@@ -1556,13 +1549,13 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign
15561549
# For projects not behind a CDN, we don't care about non-HTML
15571550
continue
15581551

1559-
full_path = storage.join(root, filename)
1552+
full_path = build_media_storage.join(root, filename)
15601553

15611554
# Generate a relative path for storage similar to os.path.relpath
15621555
relpath = full_path.replace(storage_path, '', 1).lstrip('/')
15631556

15641557
try:
1565-
md5 = hashlib.md5(storage.open(full_path, 'rb').read()).hexdigest()
1558+
md5 = hashlib.md5(build_media_storage.open(full_path, 'rb').read()).hexdigest()
15661559
except Exception:
15671560
log.exception(
15681561
'Error while generating md5 for %s:%s:%s. Don\'t stop.',
@@ -1808,10 +1801,9 @@ def remove_build_storage_paths(paths):
18081801
18091802
:param paths: list of paths in build media storage to delete
18101803
"""
1811-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
18121804
for storage_path in paths:
18131805
log.info('Removing %s from media storage', storage_path)
1814-
storage.delete_directory(storage_path)
1806+
build_media_storage.delete_directory(storage_path)
18151807

18161808

18171809
@app.task(queue='web')

readthedocs/projects/views/public.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from django.conf import settings
1414
from django.contrib import messages
1515
from django.core.cache import cache
16-
from django.core.files.storage import get_storage_class
1716
from django.db.models import prefetch_related_objects
1817
from django.http import HttpResponse
1918
from django.shortcuts import get_object_or_404, redirect, render
@@ -37,6 +36,7 @@
3736
from readthedocs.projects.views.mixins import ProjectRelationListMixin
3837
from readthedocs.proxito.views.mixins import ServeDocsMixin
3938
from readthedocs.proxito.views.utils import _get_project_data_from_request
39+
from readthedocs.storage import build_media_storage
4040

4141
from ..constants import PRIVATE
4242
from .base import ProjectOnboardMixin
@@ -365,15 +365,14 @@ def get(
365365
uip=get_client_ip(request),
366366
)
367367

368-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
369368
storage_path = version.project.get_storage_path(
370369
type_=type_,
371370
version_slug=version_slug,
372371
version_type=version.type,
373372
)
374373

375374
# URL without scheme and domain to perform an NGINX internal redirect
376-
url = storage.url(storage_path)
375+
url = build_media_storage.url(storage_path)
377376
url = urlparse(url)._replace(scheme='', netloc='').geturl()
378377

379378
return self._serve_docs(

readthedocs/proxito/tests/base.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,17 +3,25 @@
33

44
import pytest
55
import django_dynamic_fixture as fixture
6+
from django.conf import settings
67
from django.contrib.auth.models import User
8+
from django.core.files.storage import get_storage_class
79
from django.test import TestCase
810

911
from readthedocs.projects.constants import PUBLIC
1012
from readthedocs.projects.models import Project, Domain
13+
from readthedocs.proxito.views import serve
1114

1215

1316
@pytest.mark.proxito
1417
class BaseDocServing(TestCase):
1518

1619
def setUp(self):
20+
# Re-initialize storage
21+
# Various tests override either this setting or various aspects of the storage engine
22+
# By resetting it every test case, we avoid this caching (which is a huge benefit in prod)
23+
serve.build_media_storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
24+
1725
self.eric = fixture.get(User, username='eric')
1826
self.eric.set_password('eric')
1927
self.eric.save()

readthedocs/proxito/views/mixins.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
from slugify import slugify as unicode_slugify
66

77
from django.conf import settings
8-
from django.core.files.storage import get_storage_class
98
from django.http import (
109
HttpResponse,
1110
HttpResponseRedirect,
@@ -18,6 +17,7 @@
1817
from readthedocs.builds.constants import EXTERNAL, INTERNAL
1918
from readthedocs.core.resolver import resolve
2019
from readthedocs.redirects.exceptions import InfiniteRedirectException
20+
from readthedocs.storage import build_media_storage
2121

2222
log = logging.getLogger(__name__) # noqa
2323

@@ -72,8 +72,7 @@ def _serve_docs_python(self, request, final_project, path):
7272
"""
7373
log.debug('[Django serve] path=%s, project=%s', path, final_project.slug)
7474

75-
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
76-
root_path = storage.path('')
75+
root_path = build_media_storage.path('')
7776
# Serve from Python
7877
return serve(request, path, root_path)
7978

0 commit comments

Comments
 (0)