Skip to content

Commit d97217c

Browse files
authored
Merge pull request #5549 from rtfd/davidfischer/build-media-to-storage
Write build artifacts to (cloud) storage from build servers
2 parents 7b93eac + 83165b7 commit d97217c

File tree

8 files changed

+286
-103
lines changed

8 files changed

+286
-103
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ logs/*
2828
media/dash
2929
media/epub
3030
media/export
31+
media/html
3132
media/htmlzip
3233
media/json
3334
media/man

docs/settings.rst

+10
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,16 @@ Default: :djangosetting:`ALLOW_ADMIN`
9494
Whether to include `django.contrib.admin` in the URL's.
9595

9696

97+
RTD_BUILD_MEDIA_STORAGE
98+
-----------------------
99+
100+
Default: ``None``
101+
102+
Use this storage class to upload build artifacts to cloud storage (S3, Azure storage).
103+
This should be a dotted path to the relevant class (eg. ``'path.to.MyBuildMediaStorage'``).
104+
This class should mixin :class:`readthedocs.builds.storage.BuildMediaStorageMixin`.
105+
106+
97107
ELASTICSEARCH_DSL
98108
-----------------
99109

readthedocs/builds/storage.py

+105
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
import logging
2+
from pathlib import Path
3+
4+
from django.core.files.storage import FileSystemStorage
5+
from storages.utils import safe_join, get_available_overwrite_name
6+
7+
8+
log = logging.getLogger(__name__)
9+
10+
11+
class BuildMediaStorageMixin:
12+
13+
"""
14+
A mixin for Storage classes needed to write build artifacts
15+
16+
This adds and modifies some functionality to Django's File Storage API.
17+
By default, classes mixing this in will now overwrite files by default instead
18+
of finding an available name.
19+
This mixin also adds convenience methods to copy and delete entire directories.
20+
21+
See: https://docs.djangoproject.com/en/1.11/ref/files/storage
22+
"""
23+
24+
@staticmethod
25+
def _dirpath(path):
26+
"""It may just be Azure, but for listdir to work correctly, the path must end in '/'"""
27+
path = str(path)
28+
if not path.endswith('/'):
29+
path += '/'
30+
31+
return path
32+
33+
def get_available_name(self, name, max_length=None):
34+
"""
35+
Overrides Django's storage implementation to always return the passed name (overwrite)
36+
37+
By default, Django will not overwrite files even if the same name is specified.
38+
This changes that functionality so that the default is to use the same name and overwrite
39+
rather than modify the path to not clobber files.
40+
"""
41+
return get_available_overwrite_name(name, max_length=max_length)
42+
43+
def delete_directory(self, path):
44+
"""
45+
Delete all files under a certain path from storage
46+
47+
Many storage backends (S3, Azure storage) don't care about "directories".
48+
The directory effectively doesn't exist if there are no files in it.
49+
However, in these backends, there is no "rmdir" operation so you have to recursively
50+
delete all files.
51+
52+
:param path: the path to the directory to remove
53+
"""
54+
log.debug('Deleting directory %s from media storage', path)
55+
folders, files = self.listdir(self._dirpath(path))
56+
for folder_name in folders:
57+
if folder_name:
58+
# Recursively delete the subdirectory
59+
self.delete_directory(safe_join(path, folder_name))
60+
for filename in files:
61+
if filename:
62+
self.delete(safe_join(path, filename))
63+
64+
def copy_directory(self, source, destination):
65+
"""
66+
Copy a directory recursively to storage
67+
68+
:param source: the source path on the local disk
69+
:param destination: the destination path in storage
70+
"""
71+
log.debug('Copying source directory %s to media storage at %s', source, destination)
72+
source = Path(source)
73+
for filepath in source.iterdir():
74+
sub_destination = safe_join(destination, filepath.name)
75+
if filepath.is_dir():
76+
# Recursively copy the subdirectory
77+
self.copy_directory(filepath, sub_destination)
78+
elif filepath.is_file():
79+
with filepath.open('rb') as fd:
80+
self.save(sub_destination, fd)
81+
82+
83+
class BuildMediaFileSystemStorage(BuildMediaStorageMixin, FileSystemStorage):
84+
85+
"""Storage subclass that writes build artifacts under MEDIA_ROOT"""
86+
87+
def get_available_name(self, name, max_length=None):
88+
"""
89+
A hack to overwrite by default with the FileSystemStorage
90+
91+
After upgrading to Django 2.2, this method can be removed
92+
because subclasses can set OS_OPEN_FLAGS such that FileSystemStorage._save
93+
will properly overwrite files.
94+
See: https://github.com/django/django/pull/8476
95+
"""
96+
name = super().get_available_name(name, max_length=max_length)
97+
if self.exists(name):
98+
self.delete(name)
99+
return name
100+
101+
def listdir(self, path):
102+
"""Return empty lists for nonexistent directories (as cloud storages do)"""
103+
if not self.exists(path):
104+
return [], []
105+
return super().listdir(path)

readthedocs/builds/syncers.py

+9-57
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
import shutil
1111

1212
from django.conf import settings
13-
from django.core.exceptions import SuspiciousFileOperation
1413
from django.core.files.storage import get_storage_class
1514

1615
from readthedocs.core.utils import safe_makedirs
@@ -30,6 +29,15 @@ def copy(cls, path, target, is_file=False, **kwargs):
3029
raise NotImplementedError
3130

3231

32+
class NullSyncer:
33+
34+
"""A syncer that doesn't actually do anything"""
35+
36+
@classmethod
37+
def copy(cls, path, target, is_file=False, **kwargs):
38+
pass # noqa
39+
40+
3341
class LocalSyncer(BaseSyncer):
3442

3543
@classmethod
@@ -167,62 +175,6 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a
167175
)
168176

169177

170-
class SelectiveStorageRemotePuller(RemotePuller):
171-
172-
"""
173-
Like RemotePuller but certain files are copied via Django's storage system.
174-
175-
If a file with extensions specified by ``extensions`` is copied, it will be copied to storage
176-
and the original is removed.
177-
178-
See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE
179-
"""
180-
181-
extensions = ('.pdf', '.epub', '.zip')
182-
183-
@classmethod
184-
def get_storage_path(cls, path):
185-
"""
186-
Gets the path to the file within the storage engine.
187-
188-
For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf
189-
the storage_path is 'pdfs/latest.pdf'
190-
191-
:raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT
192-
"""
193-
path = os.path.normpath(path)
194-
if not path.startswith(settings.MEDIA_ROOT):
195-
raise SuspiciousFileOperation
196-
197-
path = path.replace(settings.MEDIA_ROOT, '').lstrip('/')
198-
return path
199-
200-
@classmethod
201-
def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=arguments-differ
202-
RemotePuller.copy(path, target, host, is_file, **kwargs)
203-
204-
if getattr(storage, 'write_build_media', False):
205-
# This is a sanity check for the case where
206-
# storage is backed by the local filesystem
207-
# In that case, removing the original target file locally
208-
# would remove the file from storage as well
209-
210-
if is_file and os.path.exists(target) and \
211-
any([target.lower().endswith(ext) for ext in cls.extensions]):
212-
log.info('Selective Copy %s to media storage', target)
213-
214-
try:
215-
storage_path = cls.get_storage_path(target)
216-
217-
if storage.exists(storage_path):
218-
storage.delete(storage_path)
219-
220-
with open(target, 'rb') as fd:
221-
storage.save(storage_path, fd)
222-
except Exception:
223-
log.exception('Storage access failed for file. Not failing build.')
224-
225-
226178
class Syncer(SettingsOverrideObject):
227179
_default_class = LocalSyncer
228180
_override_setting = 'FILE_SYNCER'

readthedocs/projects/models.py

+11-5
Original file line numberDiff line numberDiff line change
@@ -504,23 +504,29 @@ def get_subproject_urls(self):
504504
return [(proj.child.slug, proj.child.get_docs_url())
505505
for proj in self.subprojects.all()]
506506

507-
def get_storage_path(self, type_, version_slug=LATEST):
507+
def get_storage_path(self, type_, version_slug=LATEST, include_file=True):
508508
"""
509509
Get a path to a build artifact for use with Django's storage system.
510510
511511
:param type_: Media content type, ie - 'pdf', 'htmlzip'
512512
:param version_slug: Project version slug for lookup
513+
:param include_file: Include file name in return
513514
:return: the path to an item in storage
514515
(can be used with ``storage.url`` to get the URL)
515516
"""
516-
extension = type_.replace('htmlzip', 'zip')
517-
return '{}/{}/{}/{}.{}'.format(
517+
folder_path = '{}/{}/{}'.format(
518518
type_,
519519
self.slug,
520520
version_slug,
521-
self.slug,
522-
extension,
523521
)
522+
if include_file:
523+
extension = type_.replace('htmlzip', 'zip')
524+
return '{}/{}.{}'.format(
525+
folder_path,
526+
self.slug,
527+
extension,
528+
)
529+
return folder_path
524530

525531
def get_production_media_path(self, type_, version_slug, include_file=True):
526532
"""

0 commit comments

Comments
 (0)