Skip to content

Store ePubs and PDFs in media storage #4947

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Feb 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 65 additions & 2 deletions readthedocs/builds/syncers.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

"""
Classes to copy files between build and web servers.

Expand All @@ -13,12 +11,15 @@
import shutil

from django.conf import settings
from django.core.exceptions import SuspiciousFileOperation
from django.core.files.storage import get_storage_class

from readthedocs.core.utils import safe_makedirs
from readthedocs.core.utils.extend import SettingsOverrideObject


log = logging.getLogger(__name__)
storage = get_storage_class()()


class BaseSyncer:
Expand All @@ -42,6 +43,11 @@ def copy(cls, path, target, is_file=False, **kwargs):
return
if os.path.exists(target):
os.remove(target)

# Create containing directory if it doesn't exist
directory = os.path.dirname(target)
safe_makedirs(directory)

shutil.copy2(path, target)
else:
if os.path.exists(target):
Expand Down Expand Up @@ -143,6 +149,10 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a
log.info('Remote Pull %s to %s', path, target)
if not is_file and not os.path.exists(target):
safe_makedirs(target)
if is_file:
# Create containing directory if it doesn't exist
directory = os.path.dirname(target)
safe_makedirs(directory)
# Add a slash when copying directories
sync_cmd = "rsync -e 'ssh -T' -av --delete {user}@{host}:{path} {target}".format(
host=host,
Expand All @@ -159,6 +169,59 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a
)


class SelectiveStorageRemotePuller(RemotePuller):

"""
Like RemotePuller but certain files are copied via Django's storage system.

If a file with extensions specified by ``extensions`` is copied, it will be copied to storage
and the original is removed.

See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE
"""

extensions = ('.pdf', '.epub', '.zip')

@classmethod
def get_storage_path(cls, path):
"""
Gets the path to the file within the storage engine.

For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf
the storage_path is 'pdfs/latest.pdf'

:raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT
"""
path = os.path.normpath(path)
if not path.startswith(settings.MEDIA_ROOT):
raise SuspiciousFileOperation

path = path.replace(settings.MEDIA_ROOT, '').lstrip('/')
return path

@classmethod
def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=arguments-differ
RemotePuller.copy(path, target, host, is_file, **kwargs)

if getattr(storage, 'write_build_media', False):
# This is a sanity check for the case where
# storage is backed by the local filesystem
# In that case, removing the original target file locally
# would remove the file from storage as well

if is_file and os.path.exists(target) and \
any([target.lower().endswith(ext) for ext in cls.extensions]):
log.info('Selective Copy %s to media storage', target)

storage_path = cls.get_storage_path(target)

if storage.exists(storage_path):
storage.delete(storage_path)

with open(target, 'rb') as fd:
storage.save(storage_path, fd)


class Syncer(SettingsOverrideObject):
_default_class = LocalSyncer
_override_setting = 'FILE_SYNCER'
55 changes: 38 additions & 17 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# -*- coding: utf-8 -*-

"""Project models."""

import fnmatch
Expand All @@ -10,6 +8,7 @@

from django.conf import settings
from django.contrib.auth.models import User
from django.core.files.storage import get_storage_class
from django.db import models
from django.urls import NoReverseMatch, reverse
from django.utils.functional import cached_property
Expand Down Expand Up @@ -44,6 +43,7 @@


log = logging.getLogger(__name__)
storage = get_storage_class()()


class ProjectRelationship(models.Model):
Expand Down Expand Up @@ -510,6 +510,24 @@ def get_subproject_urls(self):
return [(proj.child.slug, proj.child.get_docs_url())
for proj in self.subprojects.all()]

def get_storage_path(self, type_, version_slug=LATEST):
"""
Get a path to a build artifact for use with Django's storage system.

:param type_: Media content type, ie - 'pdf', 'htmlzip'
:param version_slug: Project version slug for lookup
:return: the path to an item in storage
(can be used with ``storage.url`` to get the URL)
"""
extension = type_.replace('htmlzip', 'zip')
return '{}/{}/{}/{}.{}'.format(
type_,
self.slug,
version_slug,
self.slug,
extension,
)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we strictly talking filesystem path here, or URL path, or both? If filesystem, this should use os.path.join()

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is a storage check, not a filesystem check.


def get_production_media_path(self, type_, version_slug, include_file=True):
"""
Used to see if these files exist so we can offer them for download.
Expand Down Expand Up @@ -728,30 +746,33 @@ def has_aliases(self):
def has_pdf(self, version_slug=LATEST):
if not self.enable_pdf_build:
return False
return os.path.exists(
self.get_production_media_path(
type_='pdf',
version_slug=version_slug,
)
path = self.get_production_media_path(
type_='pdf', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='pdf', version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)

def has_epub(self, version_slug=LATEST):
if not self.enable_epub_build:
return False
return os.path.exists(
self.get_production_media_path(
type_='epub',
version_slug=version_slug,
)
path = self.get_production_media_path(
type_='epub', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='epub', version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)

def has_htmlzip(self, version_slug=LATEST):
return os.path.exists(
self.get_production_media_path(
type_='htmlzip',
version_slug=version_slug,
)
path = self.get_production_media_path(
type_='htmlzip', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='htmlzip', version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)

@property
def sponsored(self):
Expand Down
Loading