readthedocs · ericholscher · Feb 26, 2019 · Dec 3, 2018 · Dec 17, 2018 · Dec 17, 2018
diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py
@@ -14,12 +14,15 @@
 
 from builtins import object
 from django.conf import settings
+from django.core.exceptions import SuspiciousFileOperation
+from django.core.files.storage import get_storage_class, FileSystemStorage
 
 from readthedocs.core.utils.extend import SettingsOverrideObject
 from readthedocs.core.utils import safe_makedirs
 
 
 log = logging.getLogger(__name__)
+storage = get_storage_class()()
 
 
 class BaseSyncer(object):
@@ -43,6 +46,11 @@ def copy(cls, path, target, is_file=False, **kwargs):
                 return
             if os.path.exists(target):
                 os.remove(target)
+
+            # Create containing directory if it doesn't exist
+            directory = os.path.dirname(target)
+            safe_makedirs(directory)
+
             shutil.copy2(path, target)
         else:
             if os.path.exists(target):
@@ -138,6 +146,10 @@ def copy(cls, path, target, host, is_file=False, **kwargs):  # pylint: disable=a
         log.info("Remote Pull %s to %s", path, target)
         if not is_file and not os.path.exists(target):
             safe_makedirs(target)
+        if is_file:
+            # Create containing directory if it doesn't exist
+            directory = os.path.dirname(target)
+            safe_makedirs(directory)
         # Add a slash when copying directories
         sync_cmd = "rsync -e 'ssh -T' -av --delete {user}@{host}:{path} {target}".format(
             host=host,
@@ -154,6 +166,63 @@ def copy(cls, path, target, host, is_file=False, **kwargs):  # pylint: disable=a
             )
 
 
+class SelectiveStorageRemotePuller(RemotePuller):
+
+    """
+    Exactly like RemotePuller except that certain files are copied via Django's storage system
+
+    If a file with extensions specified by ``extensions`` is copied, it will be copied to storage
+    and the original is removed.
+
+    See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE
+    """
+
+    extensions = ('.pdf', '.epub', '.zip')
+
+    @classmethod
+    def get_storage_path(cls, path):
+        """
+        Gets the path to the file within the storage engine
+
+        For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf
+         the storage_path is 'pdfs/latest.pdf'
+
+        :raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT
+        """
+        path = os.path.normpath(path)
+        if not path.startswith(settings.MEDIA_ROOT):
+            raise SuspiciousFileOperation
+
+        path = path.replace(settings.MEDIA_ROOT, '').lstrip('/')
+        return path
+
+    @classmethod
+    def copy(cls, path, target, host, is_file=False, **kwargs):  # pylint: disable=arguments-differ
+        RemotePuller.copy(path, target, host, is_file, **kwargs)
+
+        if isinstance(storage, FileSystemStorage):
+            # This is a sanity check for the case where
+            # storage is backed by the local filesystem
+            # In that case, removing the original target file locally
+            # would remove the file from storage as well
+            return
+
+        if is_file and os.path.exists(target) and \
+                any([target.lower().endswith(ext) for ext in cls.extensions]):
+            log.info("Selective Copy %s to media storage", target)
+
+            storage_path = cls.get_storage_path(target)
+
+            if storage.exists(storage_path):
+                storage.delete(storage_path)
+
+            with open(target, 'rb') as fd:
+                storage.save(storage_path, fd)
+
+            # remove the original after copying
+            os.remove(target)
+
+
 class Syncer(SettingsOverrideObject):
     _default_class = LocalSyncer
     _override_setting = 'FILE_SYNCER'
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
@@ -12,6 +12,7 @@
 from django.conf import settings
 from django.contrib.auth.models import User
 from django.core.urlresolvers import NoReverseMatch, reverse
+from django.core.files.storage import get_storage_class
 from django.db import models
 from django.utils.encoding import python_2_unicode_compatible
 from django.utils.translation import ugettext_lazy as _
@@ -37,6 +38,7 @@
 from readthedocs.vcs_support.utils import Lock, NonBlockingLock
 
 log = logging.getLogger(__name__)
+storage = get_storage_class()()
 
 
 @python_2_unicode_compatible
@@ -411,6 +413,24 @@ def get_subproject_urls(self):
         return [(proj.child.slug, proj.child.get_docs_url())
                 for proj in self.subprojects.all()]
 
+    def get_storage_path(self, type_, version_slug=LATEST):
+        """
+        Get a path to a build artifact for use with Django's storage system
+
+        :param type_: Media content type, ie - 'pdf', 'htmlzip'
+        :param version_slug: Project version slug for lookup
+        :return: the path to an item in storage
+            (can be used with ``storage.url`` to get the URL)
+        """
+        extension = type_.replace('htmlzip', 'zip')
+        return '{}/{}/{}/{}.{}'.format(
+            type_,
+            self.slug,
+            version_slug,
+            self.slug,
+            extension,
+        )
+
     def get_production_media_path(self, type_, version_slug, include_file=True):
         """
         Used to see if these files exist so we can offer them for download.
@@ -612,18 +632,23 @@ def has_aliases(self):
     def has_pdf(self, version_slug=LATEST):
         if not self.enable_pdf_build:
             return False
-        return os.path.exists(self.get_production_media_path(
-            type_='pdf', version_slug=version_slug))
+
+        path = self.get_production_media_path(type_='pdf', version_slug=version_slug)
+        storage_path = self.get_storage_path(type_='pdf', version_slug=version_slug)
+        return os.path.exists(path) or storage.exists(storage_path)
 
     def has_epub(self, version_slug=LATEST):
         if not self.enable_epub_build:
             return False
-        return os.path.exists(self.get_production_media_path(
-            type_='epub', version_slug=version_slug))
+
+        path = self.get_production_media_path(type_='epub', version_slug=version_slug)
+        storage_path = self.get_storage_path(type_='epub', version_slug=version_slug)
+        return os.path.exists(path) or storage.exists(storage_path)
 
     def has_htmlzip(self, version_slug=LATEST):
-        return os.path.exists(self.get_production_media_path(
-            type_='htmlzip', version_slug=version_slug))
+        path = self.get_production_media_path(type_='htmlzip', version_slug=version_slug)
+        storage_path = self.get_storage_path(type_='htmlzip', version_slug=version_slug)
+        return os.path.exists(path) or storage.exists(storage_path)
 
     @property
     def sponsored(self):

diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
@@ -957,40 +957,49 @@ def move_files(version_pk, hostname, html=False, localmedia=False,
             Syncer.copy(from_path, to_path, host=hostname)
 
         if localmedia:
-            from_path = version.project.artifact_path(
-                version=version.slug,
-                type_='sphinx_localmedia',
+            from_path = os.path.join(
+                version.project.artifact_path(
+                    version=version.slug,
+                    type_='sphinx_localmedia',
+                ),
+                '{}.zip'.format(version.project.slug),
             )
             to_path = version.project.get_production_media_path(
                 type_='htmlzip',
                 version_slug=version.slug,
-                include_file=False,
+                include_file=True,
             )
-            Syncer.copy(from_path, to_path, host=hostname)
+            Syncer.copy(from_path, to_path, host=hostname, is_file=True)
 
         # Always move PDF's because the return code lies.
         if pdf:
-            from_path = version.project.artifact_path(
-                version=version.slug,
-                type_='sphinx_pdf',
+            from_path = os.path.join(
+                version.project.artifact_path(
+                    version=version.slug,
+                    type_='sphinx_pdf',
+                ),
+                '{}.pdf'.format(version.project.slug),
             )
             to_path = version.project.get_production_media_path(
                 type_='pdf',
                 version_slug=version.slug,
-                include_file=False,
+                include_file=True,
             )
-            Syncer.copy(from_path, to_path, host=hostname)
+            Syncer.copy(from_path, to_path, host=hostname, is_file=True)
         if epub:
-            from_path = version.project.artifact_path(
-                version=version.slug,
-                type_='sphinx_epub',
+            from_path = os.path.join(
+                version.project.artifact_path(
+                    version=version.slug,
+                    type_='sphinx_epub',
+                ),
+                '{}.epub'.format(version.project.slug),
             )
             to_path = version.project.get_production_media_path(
                 type_='epub',
                 version_slug=version.slug,
-                include_file=False,
+                include_file=True,
             )
-            Syncer.copy(from_path, to_path, host=hostname)
+            Syncer.copy(from_path, to_path, host=hostname, is_file=True)
 
 
 @app.task(queue='web')

diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py
@@ -16,6 +16,7 @@
 from django.contrib import messages
 from django.contrib.auth.models import User
 from django.core.cache import cache
+from django.core.files.storage import get_storage_class
 from django.core.urlresolvers import reverse
 from django.http import Http404, HttpResponse, HttpResponseRedirect
 from django.shortcuts import get_object_or_404, render
@@ -35,6 +36,7 @@
 log = logging.getLogger(__name__)
 search_log = logging.getLogger(__name__ + '.search')
 mimetypes.add_type('application/epub+zip', '.epub')
+storage = get_storage_class()()
 
 
 class ProjectIndex(ListView):
@@ -151,7 +153,7 @@ def project_badge(request, project_slug):
 
 
 def project_downloads(request, project_slug):
-    """A detail view for a project with various dataz."""
+    """A detail view for a project with various downloads."""
     project = get_object_or_404(
         Project.objects.protected(request.user), slug=project_slug)
     versions = Version.objects.public(user=request.user, project=project)
@@ -190,10 +192,18 @@ def project_download_media(request, project_slug, type_, version_slug):
     )
     privacy_level = getattr(settings, 'DEFAULT_PRIVACY_LEVEL', 'public')
     if privacy_level == 'public' or settings.DEBUG:
-        path = os.path.join(
-            settings.MEDIA_URL, type_, project_slug, version_slug,
-            '%s.%s' % (project_slug, type_.replace('htmlzip', 'zip')))
-        return HttpResponseRedirect(path)
+        storage_path = version.project.get_storage_path(type_=type_, version_slug=version_slug)
+        if storage.exists(storage_path):
+            return HttpResponseRedirect(storage.url(storage_path))
+
+        media_path = os.path.join(
+            settings.MEDIA_URL,
+            type_,
+            project_slug,
+            version_slug,
+            '%s.%s' % (project_slug, type_.replace('htmlzip', 'zip')),
+        )
+        return HttpResponseRedirect(media_path)
 
     # Get relative media path
     path = (