Skip to content

Commit cca41b7

Browse files
authored
Merge pull request #4947 from rtfd/davidfischer/storage-epubs-pdfs-zips
Store ePubs and PDFs in media storage
2 parents 559318c + 76456ab commit cca41b7

File tree

7 files changed

+293
-93
lines changed

7 files changed

+293
-93
lines changed

readthedocs/builds/syncers.py

Lines changed: 65 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
31
"""
42
Classes to copy files between build and web servers.
53
@@ -13,12 +11,15 @@
1311
import shutil
1412

1513
from django.conf import settings
14+
from django.core.exceptions import SuspiciousFileOperation
15+
from django.core.files.storage import get_storage_class
1616

1717
from readthedocs.core.utils import safe_makedirs
1818
from readthedocs.core.utils.extend import SettingsOverrideObject
1919

2020

2121
log = logging.getLogger(__name__)
22+
storage = get_storage_class()()
2223

2324

2425
class BaseSyncer:
@@ -42,6 +43,11 @@ def copy(cls, path, target, is_file=False, **kwargs):
4243
return
4344
if os.path.exists(target):
4445
os.remove(target)
46+
47+
# Create containing directory if it doesn't exist
48+
directory = os.path.dirname(target)
49+
safe_makedirs(directory)
50+
4551
shutil.copy2(path, target)
4652
else:
4753
if os.path.exists(target):
@@ -143,6 +149,10 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a
143149
log.info('Remote Pull %s to %s', path, target)
144150
if not is_file and not os.path.exists(target):
145151
safe_makedirs(target)
152+
if is_file:
153+
# Create containing directory if it doesn't exist
154+
directory = os.path.dirname(target)
155+
safe_makedirs(directory)
146156
# Add a slash when copying directories
147157
sync_cmd = "rsync -e 'ssh -T' -av --delete {user}@{host}:{path} {target}".format(
148158
host=host,
@@ -159,6 +169,59 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a
159169
)
160170

161171

172+
class SelectiveStorageRemotePuller(RemotePuller):
173+
174+
"""
175+
Like RemotePuller but certain files are copied via Django's storage system.
176+
177+
If a file with extensions specified by ``extensions`` is copied, it will be copied to storage
178+
and the original is removed.
179+
180+
See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE
181+
"""
182+
183+
extensions = ('.pdf', '.epub', '.zip')
184+
185+
@classmethod
186+
def get_storage_path(cls, path):
187+
"""
188+
Gets the path to the file within the storage engine.
189+
190+
For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf
191+
the storage_path is 'pdfs/latest.pdf'
192+
193+
:raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT
194+
"""
195+
path = os.path.normpath(path)
196+
if not path.startswith(settings.MEDIA_ROOT):
197+
raise SuspiciousFileOperation
198+
199+
path = path.replace(settings.MEDIA_ROOT, '').lstrip('/')
200+
return path
201+
202+
@classmethod
203+
def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=arguments-differ
204+
RemotePuller.copy(path, target, host, is_file, **kwargs)
205+
206+
if getattr(storage, 'write_build_media', False):
207+
# This is a sanity check for the case where
208+
# storage is backed by the local filesystem
209+
# In that case, removing the original target file locally
210+
# would remove the file from storage as well
211+
212+
if is_file and os.path.exists(target) and \
213+
any([target.lower().endswith(ext) for ext in cls.extensions]):
214+
log.info('Selective Copy %s to media storage', target)
215+
216+
storage_path = cls.get_storage_path(target)
217+
218+
if storage.exists(storage_path):
219+
storage.delete(storage_path)
220+
221+
with open(target, 'rb') as fd:
222+
storage.save(storage_path, fd)
223+
224+
162225
class Syncer(SettingsOverrideObject):
163226
_default_class = LocalSyncer
164227
_override_setting = 'FILE_SYNCER'

readthedocs/projects/models.py

Lines changed: 38 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
# -*- coding: utf-8 -*-
2-
31
"""Project models."""
42

53
import fnmatch
@@ -10,6 +8,7 @@
108

119
from django.conf import settings
1210
from django.contrib.auth.models import User
11+
from django.core.files.storage import get_storage_class
1312
from django.db import models
1413
from django.urls import NoReverseMatch, reverse
1514
from django.utils.functional import cached_property
@@ -44,6 +43,7 @@
4443

4544

4645
log = logging.getLogger(__name__)
46+
storage = get_storage_class()()
4747

4848

4949
class ProjectRelationship(models.Model):
@@ -510,6 +510,24 @@ def get_subproject_urls(self):
510510
return [(proj.child.slug, proj.child.get_docs_url())
511511
for proj in self.subprojects.all()]
512512

513+
def get_storage_path(self, type_, version_slug=LATEST):
514+
"""
515+
Get a path to a build artifact for use with Django's storage system.
516+
517+
:param type_: Media content type, ie - 'pdf', 'htmlzip'
518+
:param version_slug: Project version slug for lookup
519+
:return: the path to an item in storage
520+
(can be used with ``storage.url`` to get the URL)
521+
"""
522+
extension = type_.replace('htmlzip', 'zip')
523+
return '{}/{}/{}/{}.{}'.format(
524+
type_,
525+
self.slug,
526+
version_slug,
527+
self.slug,
528+
extension,
529+
)
530+
513531
def get_production_media_path(self, type_, version_slug, include_file=True):
514532
"""
515533
Used to see if these files exist so we can offer them for download.
@@ -728,30 +746,33 @@ def has_aliases(self):
728746
def has_pdf(self, version_slug=LATEST):
729747
if not self.enable_pdf_build:
730748
return False
731-
return os.path.exists(
732-
self.get_production_media_path(
733-
type_='pdf',
734-
version_slug=version_slug,
735-
)
749+
path = self.get_production_media_path(
750+
type_='pdf', version_slug=version_slug
751+
)
752+
storage_path = self.get_storage_path(
753+
type_='pdf', version_slug=version_slug
736754
)
755+
return os.path.exists(path) or storage.exists(storage_path)
737756

738757
def has_epub(self, version_slug=LATEST):
739758
if not self.enable_epub_build:
740759
return False
741-
return os.path.exists(
742-
self.get_production_media_path(
743-
type_='epub',
744-
version_slug=version_slug,
745-
)
760+
path = self.get_production_media_path(
761+
type_='epub', version_slug=version_slug
746762
)
763+
storage_path = self.get_storage_path(
764+
type_='epub', version_slug=version_slug
765+
)
766+
return os.path.exists(path) or storage.exists(storage_path)
747767

748768
def has_htmlzip(self, version_slug=LATEST):
749-
return os.path.exists(
750-
self.get_production_media_path(
751-
type_='htmlzip',
752-
version_slug=version_slug,
753-
)
769+
path = self.get_production_media_path(
770+
type_='htmlzip', version_slug=version_slug
771+
)
772+
storage_path = self.get_storage_path(
773+
type_='htmlzip', version_slug=version_slug
754774
)
775+
return os.path.exists(path) or storage.exists(storage_path)
755776

756777
@property
757778
def sponsored(self):

0 commit comments

Comments
 (0)