From 8f9576dc439d0217a0c29ee76b2474d1e3a5fa3f Mon Sep 17 00:00:00 2001 From: David Fischer Date: Mon, 3 Dec 2018 13:07:20 -0800 Subject: [PATCH 01/15] Store ePubs and PDFs in media storage --- readthedocs/builds/syncers.py | 69 ++++++++++++++++++++++++++++ readthedocs/projects/models.py | 37 ++++++++++++--- readthedocs/projects/tasks.py | 39 ++++++++++------ readthedocs/projects/views/public.py | 20 ++++++-- 4 files changed, 139 insertions(+), 26 deletions(-) diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py index 0ac0c6dedf6..431cfecf718 100644 --- a/readthedocs/builds/syncers.py +++ b/readthedocs/builds/syncers.py @@ -14,12 +14,15 @@ from builtins import object from django.conf import settings +from django.core.exceptions import SuspiciousFileOperation +from django.core.files.storage import get_storage_class, FileSystemStorage from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.core.utils import safe_makedirs log = logging.getLogger(__name__) +storage = get_storage_class()() class BaseSyncer(object): @@ -43,6 +46,11 @@ def copy(cls, path, target, is_file=False, **kwargs): return if os.path.exists(target): os.remove(target) + + # Create containing directory if it doesn't exist + directory = os.path.dirname(target) + safe_makedirs(directory) + shutil.copy2(path, target) else: if os.path.exists(target): @@ -138,6 +146,10 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a log.info("Remote Pull %s to %s", path, target) if not is_file and not os.path.exists(target): safe_makedirs(target) + if is_file: + # Create containing directory if it doesn't exist + directory = os.path.dirname(target) + safe_makedirs(directory) # Add a slash when copying directories sync_cmd = "rsync -e 'ssh -T' -av --delete {user}@{host}:{path} {target}".format( host=host, @@ -154,6 +166,63 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a ) +class SelectiveStorageRemotePuller(RemotePuller): + + """ + Exactly like RemotePuller except that certain files are copied via Django's storage system + + If a file with extensions specified by ``extensions`` is copied, it will be copied to storage + and the original is removed. + + See: https://docs.djangoproject.com/en/1.11/ref/settings/#std:setting-DEFAULT_FILE_STORAGE + """ + + extensions = ('.pdf', '.epub', '.zip') + + @classmethod + def get_storage_path(cls, path): + """ + Gets the path to the file within the storage engine + + For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf + the storage_path is 'pdfs/latest.pdf' + + :raises: SuspiciousFileOperation if the path isn't under settings.MEDIA_ROOT + """ + path = os.path.normpath(path) + if not path.startswith(settings.MEDIA_ROOT): + raise SuspiciousFileOperation + + path = path.replace(settings.MEDIA_ROOT, '').lstrip('/') + return path + + @classmethod + def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=arguments-differ + RemotePuller.copy(path, target, host, is_file, **kwargs) + + if isinstance(storage, FileSystemStorage): + # This is a sanity check for the case where + # storage is backed by the local filesystem + # In that case, removing the original target file locally + # would remove the file from storage as well + return + + if is_file and os.path.exists(target) and \ + any([target.lower().endswith(ext) for ext in cls.extensions]): + log.info("Selective Copy %s to media storage", target) + + storage_path = cls.get_storage_path(target) + + if storage.exists(storage_path): + storage.delete(storage_path) + + with open(target, 'rb') as fd: + storage.save(storage_path, fd) + + # remove the original after copying + os.remove(target) + + class Syncer(SettingsOverrideObject): _default_class = LocalSyncer _override_setting = 'FILE_SYNCER' diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 2fc0dcf9e0f..d1ef073a59d 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -12,6 +12,7 @@ from django.conf import settings from django.contrib.auth.models import User from django.core.urlresolvers import NoReverseMatch, reverse +from django.core.files.storage import get_storage_class from django.db import models from django.utils.encoding import python_2_unicode_compatible from django.utils.translation import ugettext_lazy as _ @@ -37,6 +38,7 @@ from readthedocs.vcs_support.utils import Lock, NonBlockingLock log = logging.getLogger(__name__) +storage = get_storage_class()() @python_2_unicode_compatible @@ -411,6 +413,24 @@ def get_subproject_urls(self): return [(proj.child.slug, proj.child.get_docs_url()) for proj in self.subprojects.all()] + def get_storage_path(self, type_, version_slug=LATEST): + """ + Get a path to a build artifact for use with Django's storage system + + :param type_: Media content type, ie - 'pdf', 'htmlzip' + :param version_slug: Project version slug for lookup + :return: the path to an item in storage + (can be used with ``storage.url`` to get the URL) + """ + extension = type_.replace('htmlzip', 'zip') + return '{}/{}/{}/{}.{}'.format( + type_, + self.slug, + version_slug, + self.slug, + extension, + ) + def get_production_media_path(self, type_, version_slug, include_file=True): """ Used to see if these files exist so we can offer them for download. @@ -612,18 +632,23 @@ def has_aliases(self): def has_pdf(self, version_slug=LATEST): if not self.enable_pdf_build: return False - return os.path.exists(self.get_production_media_path( - type_='pdf', version_slug=version_slug)) + + path = self.get_production_media_path(type_='pdf', version_slug=version_slug) + storage_path = self.get_storage_path(type_='pdf', version_slug=version_slug) + return os.path.exists(path) or storage.exists(storage_path) def has_epub(self, version_slug=LATEST): if not self.enable_epub_build: return False - return os.path.exists(self.get_production_media_path( - type_='epub', version_slug=version_slug)) + + path = self.get_production_media_path(type_='epub', version_slug=version_slug) + storage_path = self.get_storage_path(type_='epub', version_slug=version_slug) + return os.path.exists(path) or storage.exists(storage_path) def has_htmlzip(self, version_slug=LATEST): - return os.path.exists(self.get_production_media_path( - type_='htmlzip', version_slug=version_slug)) + path = self.get_production_media_path(type_='htmlzip', version_slug=version_slug) + storage_path = self.get_storage_path(type_='htmlzip', version_slug=version_slug) + return os.path.exists(path) or storage.exists(storage_path) @property def sponsored(self): diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index b36df403baf..ff78a8bf485 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -957,40 +957,49 @@ def move_files(version_pk, hostname, html=False, localmedia=False, Syncer.copy(from_path, to_path, host=hostname) if localmedia: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_localmedia', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_localmedia', + ), + '{}.zip'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='htmlzip', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) + Syncer.copy(from_path, to_path, host=hostname, is_file=True) # Always move PDF's because the return code lies. if pdf: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_pdf', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_pdf', + ), + '{}.pdf'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='pdf', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) + Syncer.copy(from_path, to_path, host=hostname, is_file=True) if epub: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_epub', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_epub', + ), + '{}.epub'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='epub', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) + Syncer.copy(from_path, to_path, host=hostname, is_file=True) @app.task(queue='web') diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index e5a3e2bf5ea..c3d69ff36f5 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -16,6 +16,7 @@ from django.contrib import messages from django.contrib.auth.models import User from django.core.cache import cache +from django.core.files.storage import get_storage_class from django.core.urlresolvers import reverse from django.http import Http404, HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404, render @@ -35,6 +36,7 @@ log = logging.getLogger(__name__) search_log = logging.getLogger(__name__ + '.search') mimetypes.add_type('application/epub+zip', '.epub') +storage = get_storage_class()() class ProjectIndex(ListView): @@ -151,7 +153,7 @@ def project_badge(request, project_slug): def project_downloads(request, project_slug): - """A detail view for a project with various dataz.""" + """A detail view for a project with various downloads.""" project = get_object_or_404( Project.objects.protected(request.user), slug=project_slug) versions = Version.objects.public(user=request.user, project=project) @@ -190,10 +192,18 @@ def project_download_media(request, project_slug, type_, version_slug): ) privacy_level = getattr(settings, 'DEFAULT_PRIVACY_LEVEL', 'public') if privacy_level == 'public' or settings.DEBUG: - path = os.path.join( - settings.MEDIA_URL, type_, project_slug, version_slug, - '%s.%s' % (project_slug, type_.replace('htmlzip', 'zip'))) - return HttpResponseRedirect(path) + storage_path = version.project.get_storage_path(type_=type_, version_slug=version_slug) + if storage.exists(storage_path): + return HttpResponseRedirect(storage.url(storage_path)) + + media_path = os.path.join( + settings.MEDIA_URL, + type_, + project_slug, + version_slug, + '%s.%s' % (project_slug, type_.replace('htmlzip', 'zip')), + ) + return HttpResponseRedirect(media_path) # Get relative media path path = ( From 7a2216e42090495c84b57c8b1e7628f9848a4709 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Mon, 17 Dec 2018 15:44:02 -0800 Subject: [PATCH 02/15] Add basic tests --- readthedocs/rtd_tests/tests/test_project.py | 14 ++++++++++++++ readthedocs/rtd_tests/tests/test_project_views.py | 11 +++++++++++ 2 files changed, 25 insertions(+) diff --git a/readthedocs/rtd_tests/tests/test_project.py b/readthedocs/rtd_tests/tests/test_project.py index 679b761a25a..dadfa9c7a39 100644 --- a/readthedocs/rtd_tests/tests/test_project.py +++ b/readthedocs/rtd_tests/tests/test_project.py @@ -118,6 +118,20 @@ def test_multiple_conf_files(self, find_method): ProjectConfigurationError.MULTIPLE_CONF_FILES) as cm: self.pip.conf_file() + def test_get_storage_path(self): + self.assertEqual( + self.pip.get_storage_path('pdf', LATEST), + 'pdf/pip/latest/pip.pdf', + ) + self.assertEqual( + self.pip.get_storage_path('epub', LATEST), + 'epub/pip/latest/pip.epub', + ) + self.assertEqual( + self.pip.get_storage_path('htmlzip', LATEST), + 'htmlzip/pip/latest/pip.zip', + ) + class TestProjectTranslations(ProjectMixin, TestCase): diff --git a/readthedocs/rtd_tests/tests/test_project_views.py b/readthedocs/rtd_tests/tests/test_project_views.py index c6e4a93f884..259719f85a0 100644 --- a/readthedocs/rtd_tests/tests/test_project_views.py +++ b/readthedocs/rtd_tests/tests/test_project_views.py @@ -15,6 +15,7 @@ import six from readthedocs.builds.models import Build, Version +from readthedocs.builds.constants import LATEST from readthedocs.rtd_tests.base import (WizardTestCase, MockBuildTestCase, RequestFactoryTestMixin) from readthedocs.oauth.models import RemoteRepository @@ -346,6 +347,16 @@ def test_import_demo_imported_duplicate(self): Project.objects.get(slug='eric-demo')) +class TestPublicViews(MockBuildTestCase): + def setUp(self): + self.pip = get(Project, slug='pip') + + def test_project_download_media(self): + url = reverse('project_download_media', args=[self.pip.slug, 'pdf', LATEST]) + response = self.client.get(url) + self.assertEqual(response.status_code, 302) + + class TestPrivateViews(MockBuildTestCase): def setUp(self): self.user = new(User, username='eric') From c3fa90e6947bc88be0b8e5dc1251b9469190d09b Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 5 Feb 2019 10:32:04 -0300 Subject: [PATCH 03/15] Fix merge error --- readthedocs/projects/tasks.py | 40 +++++++++++++++++++++-------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index a8b10eecd09..12c9e38ec03 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1000,40 +1000,48 @@ def move_files( Syncer.copy(from_path, to_path, host=hostname) if localmedia: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_localmedia', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_localmedia', + ), + '{}.zip'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='htmlzip', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) - + Syncer.copy(from_path, to_path, host=hostname, is_file=True) # Always move PDF's because the return code lies. if pdf: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_pdf', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_pdf', + ), + '{}.pdf'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='pdf', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) + Syncer.copy(from_path, to_path, host=hostname, is_file=True) if epub: - from_path = version.project.artifact_path( - version=version.slug, - type_='sphinx_epub', + from_path = os.path.join( + version.project.artifact_path( + version=version.slug, + type_='sphinx_epub', + ), + '{}.epub'.format(version.project.slug), ) to_path = version.project.get_production_media_path( type_='epub', version_slug=version.slug, - include_file=False, + include_file=True, ) - Syncer.copy(from_path, to_path, host=hostname) + Syncer.copy(from_path, to_path, host=hostname, is_file=True) @app.task(queue='web') From b500528541ccd0cead927c03547487edcfa2c1da Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 5 Feb 2019 10:35:53 -0300 Subject: [PATCH 04/15] Fix another merge error --- readthedocs/projects/views/public.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 827849aebf4..3d5398f3c67 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -210,28 +210,18 @@ def project_download_media(request, project_slug, type_, version_slug): ) privacy_level = getattr(settings, 'DEFAULT_PRIVACY_LEVEL', 'public') if privacy_level == 'public' or settings.DEBUG: -<<<<<<< HEAD storage_path = version.project.get_storage_path(type_=type_, version_slug=version_slug) if storage.exists(storage_path): return HttpResponseRedirect(storage.url(storage_path)) media_path = os.path.join( -======= - path = os.path.join( ->>>>>>> origin/master settings.MEDIA_URL, type_, project_slug, version_slug, -<<<<<<< HEAD '%s.%s' % (project_slug, type_.replace('htmlzip', 'zip')), ) return HttpResponseRedirect(media_path) -======= - '{}.{}'.format(project_slug, type_.replace('htmlzip', 'zip')), - ) - return HttpResponseRedirect(path) ->>>>>>> origin/master # Get relative media path path = ( From 8a602c827f8028b5ea6965def73d4fa26fd2cf22 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 5 Feb 2019 12:13:45 -0300 Subject: [PATCH 05/15] Send move_files task to only one web when uploading to Azure --- readthedocs/projects/tasks.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 12c9e38ec03..d7162fc2de9 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -20,6 +20,7 @@ import requests from celery.exceptions import SoftTimeLimitExceeded from django.conf import settings +from django.core.files.storage import get_storage_class, FileSystemStorage from django.db.models import Q from django.urls import reverse from django.utils import timezone @@ -79,6 +80,7 @@ log = logging.getLogger(__name__) +storage = get_storage_class()() class SyncRepositoryMixin: @@ -722,6 +724,24 @@ def update_app_instances( 'Updating version failed, skipping file sync: version=%s', self.version, ) + hostname = socket.gethostname() + + if not isinstance(storage, FileSystemStorage): + # Handle the case where we want to upload some built assets to our storage + move_files.delay( + self.version.pk, + hostname, + self.config.doctype, + html=False, + search=False, + localmedia=localmedia, + pdf=pdf, + epub=epub, + ) + # Set variables so they don't get synced in the next broadcast step + localmedia = False + pdf = False + epub = False # Broadcast finalization steps to web application instances broadcast( @@ -733,7 +753,7 @@ def update_app_instances( self.config.doctype, ], kwargs=dict( - hostname=socket.gethostname(), + hostname=hostname, html=html, localmedia=localmedia, search=search, @@ -743,7 +763,6 @@ def update_app_instances( callback=sync_callback.s( version_pk=self.version.pk, commit=self.build['commit'], - search=search, ), ) From 6edacad69c303fbc1bc3a8117b0daac5d3926aa2 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 5 Feb 2019 12:29:27 -0300 Subject: [PATCH 06/15] Handle deleting on the webs in the normal case --- readthedocs/projects/tasks.py | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index d7162fc2de9..ec595d1f223 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -705,6 +705,7 @@ def update_app_instances( search=False, pdf=False, epub=False, + delete=True, ): """ Update application instances with build artifacts. @@ -742,6 +743,7 @@ def update_app_instances( localmedia = False pdf = False epub = False + delete = False # Broadcast finalization steps to web application instances broadcast( @@ -759,6 +761,7 @@ def update_app_instances( search=search, pdf=pdf, epub=epub, + delete=delete, ), callback=sync_callback.s( version_pk=self.version.pk, @@ -915,6 +918,7 @@ def sync_files( search=False, pdf=False, epub=False, + delete=False, ): """ Sync build artifacts to application instances. @@ -926,20 +930,21 @@ def sync_files( version = Version.objects.get_object_or_log(pk=version_pk) if not version: return - if not pdf: - remove_dirs([ - version.project.get_production_media_path( - type_='pdf', - version_slug=version.slug, - ), - ]) - if not epub: - remove_dirs([ - version.project.get_production_media_path( - type_='epub', - version_slug=version.slug, - ), - ]) + if delete: + if not pdf: + remove_dirs([ + version.project.get_production_media_path( + type_='pdf', + version_slug=version.slug, + ), + ]) + if not epub: + remove_dirs([ + version.project.get_production_media_path( + type_='epub', + version_slug=version.slug, + ), + ]) # Sync files to the web servers move_files( From 03787f4881dd7431c00b80bfccf1b4f9f7c25a28 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Tue, 12 Feb 2019 16:13:47 -0800 Subject: [PATCH 07/15] Improved filesystem storage check - Basically we need to explicitly tell the storage to sync build media --- readthedocs/builds/syncers.py | 24 ++++++++++-------------- readthedocs/projects/tasks.py | 4 ++-- 2 files changed, 12 insertions(+), 16 deletions(-) diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py index 87249ecf973..ca8b5612a2c 100644 --- a/readthedocs/builds/syncers.py +++ b/readthedocs/builds/syncers.py @@ -14,7 +14,7 @@ from django.conf import settings from django.core.exceptions import SuspiciousFileOperation -from django.core.files.storage import get_storage_class, FileSystemStorage +from django.core.files.storage import get_storage_class from readthedocs.core.utils import safe_makedirs from readthedocs.core.utils.extend import SettingsOverrideObject @@ -205,27 +205,23 @@ def get_storage_path(cls, path): def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=arguments-differ RemotePuller.copy(path, target, host, is_file, **kwargs) - if isinstance(storage, FileSystemStorage): + if getattr(storage, 'write_build_media', False): # This is a sanity check for the case where # storage is backed by the local filesystem # In that case, removing the original target file locally # would remove the file from storage as well - return - if is_file and os.path.exists(target) and \ - any([target.lower().endswith(ext) for ext in cls.extensions]): - log.info("Selective Copy %s to media storage", target) + if is_file and os.path.exists(target) and \ + any([target.lower().endswith(ext) for ext in cls.extensions]): + log.info("Selective Copy %s to media storage", target) - storage_path = cls.get_storage_path(target) + storage_path = cls.get_storage_path(target) - if storage.exists(storage_path): - storage.delete(storage_path) + if storage.exists(storage_path): + storage.delete(storage_path) - with open(target, 'rb') as fd: - storage.save(storage_path, fd) - - # remove the original after copying - os.remove(target) + with open(target, 'rb') as fd: + storage.save(storage_path, fd) class Syncer(SettingsOverrideObject): diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index ec595d1f223..0c8585fe3a1 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -20,7 +20,7 @@ import requests from celery.exceptions import SoftTimeLimitExceeded from django.conf import settings -from django.core.files.storage import get_storage_class, FileSystemStorage +from django.core.files.storage import get_storage_class from django.db.models import Q from django.urls import reverse from django.utils import timezone @@ -727,7 +727,7 @@ def update_app_instances( ) hostname = socket.gethostname() - if not isinstance(storage, FileSystemStorage): + if getattr(storage, 'write_build_media', False): # Handle the case where we want to upload some built assets to our storage move_files.delay( self.version.pk, From 4fab169e6f7291d525f1d48ee318cd1ae2a38d0d Mon Sep 17 00:00:00 2001 From: David Fischer Date: Wed, 13 Feb 2019 12:09:18 -0800 Subject: [PATCH 08/15] Remove old PDFs/ePubs when they are no longer built --- readthedocs/projects/tasks.py | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 0c8585fe3a1..ee479a344ee 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -938,6 +938,16 @@ def sync_files( version_slug=version.slug, ), ]) + + if getattr(storage, 'write_build_media', False): + # Remove PDF from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='pdf', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) if not epub: remove_dirs([ version.project.get_production_media_path( @@ -946,6 +956,16 @@ def sync_files( ), ]) + if getattr(storage, 'write_build_media', False): + # Remove ePub from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='epub', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) + # Sync files to the web servers move_files( version_pk, From 5e9597a37895d5d29b658c6f1c5c8c16b136ac42 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 17:15:49 -0300 Subject: [PATCH 09/15] Remove delete logic --- readthedocs/projects/tasks.py | 69 ++++++++++++++++------------------- 1 file changed, 32 insertions(+), 37 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index fac62ea1555..276b7831df9 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -720,7 +720,6 @@ def update_app_instances( search=False, pdf=False, epub=False, - delete=True, ): """ Update application instances with build artifacts. @@ -758,7 +757,6 @@ def update_app_instances( localmedia = False pdf = False epub = False - delete = False # Broadcast finalization steps to web application instances broadcast( @@ -776,7 +774,6 @@ def update_app_instances( search=search, pdf=pdf, epub=epub, - delete=delete, ), callback=sync_callback.s( version_pk=self.version.pk, @@ -933,7 +930,6 @@ def sync_files( search=False, pdf=False, epub=False, - delete=False, ): """ Sync build artifacts to application instances. @@ -945,41 +941,40 @@ def sync_files( version = Version.objects.get_object_or_log(pk=version_pk) if not version: return - if delete: - if not pdf: - remove_dirs([ - version.project.get_production_media_path( - type_='pdf', - version_slug=version.slug, - ), - ]) + if not pdf: + remove_dirs([ + version.project.get_production_media_path( + type_='pdf', + version_slug=version.slug, + ), + ]) - if getattr(storage, 'write_build_media', False): - # Remove PDF from remote storage if it exists - storage_path = version.project.get_storage_path( - type_='pdf', - version_slug=version.slug, - ) - if storage.exists(storage_path): - log.info('Removing %s from media storage', storage_path) - storage.delete(storage_path) - if not epub: - remove_dirs([ - version.project.get_production_media_path( - type_='epub', - version_slug=version.slug, - ), - ]) + if getattr(storage, 'write_build_media', False): + # Remove PDF from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='pdf', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) + if not epub: + remove_dirs([ + version.project.get_production_media_path( + type_='epub', + version_slug=version.slug, + ), + ]) - if getattr(storage, 'write_build_media', False): - # Remove ePub from remote storage if it exists - storage_path = version.project.get_storage_path( - type_='epub', - version_slug=version.slug, - ) - if storage.exists(storage_path): - log.info('Removing %s from media storage', storage_path) - storage.delete(storage_path) + if getattr(storage, 'write_build_media', False): + # Remove ePub from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='epub', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) # Sync files to the web servers move_files( From 440567ae341d06679f0c3a0e411321d17ec849e2 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 17:53:38 -0300 Subject: [PATCH 10/15] Fix delete logic with a comment --- readthedocs/projects/tasks.py | 102 ++++++++++++++++++++++------------ 1 file changed, 68 insertions(+), 34 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 276b7831df9..31ec810db7b 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -741,6 +741,8 @@ def update_app_instances( ) hostname = socket.gethostname() + delete_unsynced_media = True + if getattr(storage, 'write_build_media', False): # Handle the case where we want to upload some built assets to our storage move_files.delay( @@ -752,11 +754,13 @@ def update_app_instances( localmedia=localmedia, pdf=pdf, epub=epub, + delete_unsynced_media=delete_unsynced_media, ) # Set variables so they don't get synced in the next broadcast step localmedia = False pdf = False epub = False + delete_unsynced_media = False # Broadcast finalization steps to web application instances broadcast( @@ -774,6 +778,7 @@ def update_app_instances( search=search, pdf=pdf, epub=epub, + delete_unsynced_media=delete_unsynced_media, ), callback=sync_callback.s( version_pk=self.version.pk, @@ -930,6 +935,7 @@ def sync_files( search=False, pdf=False, epub=False, + delete_unsynced_media=True ): """ Sync build artifacts to application instances. @@ -941,40 +947,6 @@ def sync_files( version = Version.objects.get_object_or_log(pk=version_pk) if not version: return - if not pdf: - remove_dirs([ - version.project.get_production_media_path( - type_='pdf', - version_slug=version.slug, - ), - ]) - - if getattr(storage, 'write_build_media', False): - # Remove PDF from remote storage if it exists - storage_path = version.project.get_storage_path( - type_='pdf', - version_slug=version.slug, - ) - if storage.exists(storage_path): - log.info('Removing %s from media storage', storage_path) - storage.delete(storage_path) - if not epub: - remove_dirs([ - version.project.get_production_media_path( - type_='epub', - version_slug=version.slug, - ), - ]) - - if getattr(storage, 'write_build_media', False): - # Remove ePub from remote storage if it exists - storage_path = version.project.get_storage_path( - type_='epub', - version_slug=version.slug, - ) - if storage.exists(storage_path): - log.info('Removing %s from media storage', storage_path) - storage.delete(storage_path) # Sync files to the web servers move_files( @@ -986,6 +958,7 @@ def sync_files( search=search, pdf=pdf, epub=epub, + delete_unsynced_media=delete_unsynced_media, ) # Symlink project @@ -1005,6 +978,7 @@ def move_files( search=False, pdf=False, epub=False, + delete_unsynced_media=True, ): """ Task to move built documentation to web servers. @@ -1021,10 +995,70 @@ def move_files( :type pdf: bool :param epub: Sync ePub files :type epub: bool + :param delete_unsynced_media: Whether to try and delete files. + :type delete_unsynced_media: bool """ version = Version.objects.get_object_or_log(pk=version_pk) if not version: return + + # This is False if we have already synced media files to blob storage + # We set `epub=False` for example so data doesn't get re-uploaded on each web, + # so we need this to protect against deleting in those cases + if delete_unsynced_media: + if not pdf: + remove_dirs([ + version.project.get_production_media_path( + type_='pdf', + version_slug=version.slug, + ), + ]) + + if getattr(storage, 'write_build_media', False): + # Remove PDF from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='pdf', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) + if not epub: + remove_dirs([ + version.project.get_production_media_path( + type_='epub', + version_slug=version.slug, + ), + ]) + + if getattr(storage, 'write_build_media', False): + # Remove ePub from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='epub', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) + + if not localmedia: + remove_dirs([ + version.project.get_production_media_path( + type_='htmlzip', + version_slug=version.slug, + ), + ]) + + if getattr(storage, 'write_build_media', False): + # Remove ePub from remote storage if it exists + storage_path = version.project.get_storage_path( + type_='htmlzip', + version_slug=version.slug, + ) + if storage.exists(storage_path): + log.info('Removing %s from media storage', storage_path) + storage.delete(storage_path) + log.debug( LOG_TEMPLATE.format( project=version.project.slug, From 9ca62dbee1b61fcbb7dfead6f915e6fe50c758c0 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 18:08:57 -0300 Subject: [PATCH 11/15] Fix local file delete logic with include_file=False --- readthedocs/projects/tasks.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 31ec810db7b..61914bef763 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1006,11 +1006,14 @@ def move_files( # We set `epub=False` for example so data doesn't get re-uploaded on each web, # so we need this to protect against deleting in those cases if delete_unsynced_media: + if not pdf: + remove_dirs([ version.project.get_production_media_path( type_='pdf', version_slug=version.slug, + include_file=False, ), ]) @@ -1023,11 +1026,14 @@ def move_files( if storage.exists(storage_path): log.info('Removing %s from media storage', storage_path) storage.delete(storage_path) + if not epub: + remove_dirs([ version.project.get_production_media_path( type_='epub', version_slug=version.slug, + include_file=False, ), ]) @@ -1042,10 +1048,12 @@ def move_files( storage.delete(storage_path) if not localmedia: + remove_dirs([ version.project.get_production_media_path( type_='htmlzip', version_slug=version.slug, + include_file=False, ), ]) From f97833bc5e436c666543b8271249b094dac23b2a Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 18:26:23 -0300 Subject: [PATCH 12/15] Lint --- readthedocs/builds/syncers.py | 9 +++--- readthedocs/projects/models.py | 28 +++++++++++++------ readthedocs/projects/tasks.py | 16 +++-------- readthedocs/projects/views/public.py | 8 +++--- readthedocs/rtd_tests/tests/test_project.py | 7 ++--- .../rtd_tests/tests/test_project_views.py | 5 ++-- 6 files changed, 35 insertions(+), 38 deletions(-) diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py index ca8b5612a2c..db94b418e02 100644 --- a/readthedocs/builds/syncers.py +++ b/readthedocs/builds/syncers.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Classes to copy files between build and web servers. @@ -174,7 +172,8 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a class SelectiveStorageRemotePuller(RemotePuller): """ - Exactly like RemotePuller except that certain files are copied via Django's storage system + Exactly like RemotePuller except that certain files are copied via Django's + storage system. If a file with extensions specified by ``extensions`` is copied, it will be copied to storage and the original is removed. @@ -187,7 +186,7 @@ class SelectiveStorageRemotePuller(RemotePuller): @classmethod def get_storage_path(cls, path): """ - Gets the path to the file within the storage engine + Gets the path to the file within the storage engine. For example, if the path was $MEDIA_ROOT/pdfs/latest.pdf the storage_path is 'pdfs/latest.pdf' @@ -213,7 +212,7 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a if is_file and os.path.exists(target) and \ any([target.lower().endswith(ext) for ext in cls.extensions]): - log.info("Selective Copy %s to media storage", target) + log.info('Selective Copy %s to media storage', target) storage_path = cls.get_storage_path(target) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 29d67a3bbe6..c94aadd9d4e 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Project models.""" import fnmatch @@ -514,7 +512,7 @@ def get_subproject_urls(self): def get_storage_path(self, type_, version_slug=LATEST): """ - Get a path to a build artifact for use with Django's storage system + Get a path to a build artifact for use with Django's storage system. :param type_: Media content type, ie - 'pdf', 'htmlzip' :param version_slug: Project version slug for lookup @@ -748,20 +746,32 @@ def has_aliases(self): def has_pdf(self, version_slug=LATEST): if not self.enable_pdf_build: return False - path = self.get_production_media_path(type_='pdf', version_slug=version_slug) - storage_path = self.get_storage_path(type_='pdf', version_slug=version_slug) + path = self.get_production_media_path( + type_='pdf', version_slug=version_slug + ) + storage_path = self.get_storage_path( + type_='pdf', version_slug=version_slug + ) return os.path.exists(path) or storage.exists(storage_path) def has_epub(self, version_slug=LATEST): if not self.enable_epub_build: return False - path = self.get_production_media_path(type_='epub', version_slug=version_slug) - storage_path = self.get_storage_path(type_='epub', version_slug=version_slug) + path = self.get_production_media_path( + type_='epub', version_slug=version_slug + ) + storage_path = self.get_storage_path( + type_='epub', version_slug=version_slug + ) return os.path.exists(path) or storage.exists(storage_path) def has_htmlzip(self, version_slug=LATEST): - path = self.get_production_media_path(type_='htmlzip', version_slug=version_slug) - storage_path = self.get_storage_path(type_='htmlzip', version_slug=version_slug) + path = self.get_production_media_path( + type_='htmlzip', version_slug=version_slug + ) + storage_path = self.get_storage_path( + type_='htmlzip', version_slug=version_slug + ) return os.path.exists(path) or storage.exists(storage_path) @property diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 61914bef763..721bfa9d1e4 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """ Tasks related to projects. @@ -854,7 +852,8 @@ def build_docs_html(self): type='app', task=move_files, args=[ - self.version.pk, socket.gethostname(), self.config.doctype + self.version.pk, + socket.gethostname(), self.config.doctype ], kwargs=dict(html=True), ) @@ -926,15 +925,8 @@ def is_type_sphinx(self): # Web tasks @app.task(queue='web') def sync_files( - project_pk, - version_pk, - doctype, - hostname=None, - html=False, - localmedia=False, - search=False, - pdf=False, - epub=False, + project_pk, version_pk, doctype, hostname=None, html=False, + localmedia=False, search=False, pdf=False, epub=False, delete_unsynced_media=True ): """ diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index aa994bcb806..2976cecb693 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -1,5 +1,3 @@ -# -*- coding: utf-8 -*- - """Public project views.""" import json @@ -15,7 +13,7 @@ from django.contrib.auth.models import User from django.core.cache import cache from django.core.files.storage import get_storage_class -from django.http import Http404, HttpResponse, HttpResponseRedirect +from django.http import HttpResponse, HttpResponseRedirect from django.shortcuts import get_object_or_404, render from django.urls import reverse from django.views.decorators.cache import never_cache @@ -208,7 +206,9 @@ def project_download_media(request, project_slug, type_, version_slug): ) privacy_level = getattr(settings, 'DEFAULT_PRIVACY_LEVEL', 'public') if privacy_level == 'public' or settings.DEBUG: - storage_path = version.project.get_storage_path(type_=type_, version_slug=version_slug) + storage_path = version.project.get_storage_path( + type_=type_, version_slug=version_slug + ) if storage.exists(storage_path): return HttpResponseRedirect(storage.url(storage_path)) diff --git a/readthedocs/rtd_tests/tests/test_project.py b/readthedocs/rtd_tests/tests/test_project.py index b21cd858bae..178ec745cd5 100644 --- a/readthedocs/rtd_tests/tests/test_project.py +++ b/readthedocs/rtd_tests/tests/test_project.py @@ -1,4 +1,3 @@ -# -*- coding: utf-8 -*- import datetime import json @@ -253,10 +252,8 @@ def test_user_can_not_add_other_user_project_as_translation(self): self.assertIsNone(project_b.main_language_project) def test_previous_users_can_list_and_delete_translations_not_owner(self): - """ - Test to make sure that previous users can list and delete - projects where they aren't owners. - """ + """Test to make sure that previous users can list and delete projects + where they aren't owners.""" user_a = User.objects.get(username='eric') project_a = get( Project, users=[user_a], diff --git a/readthedocs/rtd_tests/tests/test_project_views.py b/readthedocs/rtd_tests/tests/test_project_views.py index 6fc01480fd4..f561f78acd6 100644 --- a/readthedocs/rtd_tests/tests/test_project_views.py +++ b/readthedocs/rtd_tests/tests/test_project_views.py @@ -1,8 +1,7 @@ -# -*- coding: utf-8 -*- from datetime import timedelta -from mock import patch +from allauth.account.models import EmailAddress from django.contrib.auth.models import User from django.contrib.messages import constants as message_const from django.http.response import HttpResponseRedirect @@ -11,7 +10,7 @@ from django.utils import timezone from django.views.generic.base import ContextMixin from django_dynamic_fixture import get, new -from allauth.account.models import EmailAddress +from mock import patch from readthedocs.builds.constants import LATEST from readthedocs.builds.models import Build, Version From 8c14af70bb921eebf19b069c902d3b0970dea104 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 18:36:02 -0300 Subject: [PATCH 13/15] Fix up lint error on docstring --- readthedocs/builds/syncers.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py index db94b418e02..7291bade16f 100644 --- a/readthedocs/builds/syncers.py +++ b/readthedocs/builds/syncers.py @@ -172,8 +172,7 @@ def copy(cls, path, target, host, is_file=False, **kwargs): # pylint: disable=a class SelectiveStorageRemotePuller(RemotePuller): """ - Exactly like RemotePuller except that certain files are copied via Django's - storage system. + Like RemotePuller but certain files are copied via Django's storage system. If a file with extensions specified by ``extensions`` is copied, it will be copied to storage and the original is removed. From 34306adabb97b8f6f1f36601c72a3fc0f6d77065 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Mon, 25 Feb 2019 18:46:03 -0300 Subject: [PATCH 14/15] =?UTF-8?q?Don=E2=80=99t=20delete=20to=20deleting=20?= =?UTF-8?q?unsynced=20media.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only do it when we really mean it. --- readthedocs/projects/tasks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 721bfa9d1e4..e9bd048a18c 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -927,7 +927,7 @@ def is_type_sphinx(self): def sync_files( project_pk, version_pk, doctype, hostname=None, html=False, localmedia=False, search=False, pdf=False, epub=False, - delete_unsynced_media=True + delete_unsynced_media=False, ): """ Sync build artifacts to application instances. @@ -970,7 +970,7 @@ def move_files( search=False, pdf=False, epub=False, - delete_unsynced_media=True, + delete_unsynced_media=False, ): """ Task to move built documentation to web servers. From 76456ab1c2efd8a4e135ba9d9decd4e5bec722aa Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 26 Feb 2019 13:34:41 -0300 Subject: [PATCH 15/15] Fix tests for fix around deleting htmlzip --- .../rtd_tests/tests/test_projects_tasks.py | 40 +++++++++++-------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_projects_tasks.py b/readthedocs/rtd_tests/tests/test_projects_tasks.py index 03beaf87106..c91f9b1534c 100644 --- a/readthedocs/rtd_tests/tests/test_projects_tasks.py +++ b/readthedocs/rtd_tests/tests/test_projects_tasks.py @@ -18,8 +18,9 @@ def setUp(self): @patch('readthedocs.builds.syncers.Syncer.copy') @patch('readthedocs.projects.tasks.shutil.rmtree') def test_sync_files_clean_old_artifacts(self, rmtree, copy): - sync_files(self.project.pk, self.version.pk, 'sphinx', html=True) - pdf, epub = rmtree.call_args_list + sync_files(self.project.pk, self.version.pk, 'sphinx', + html=True, delete_unsynced_media=True) + pdf, epub, htmlzip = rmtree.call_args_list # pdf and epub are cleaned args, _ = pdf @@ -28,7 +29,7 @@ def test_sync_files_clean_old_artifacts(self, rmtree, copy): self.assertIn('epub', args[0]) # Artifacts are copied to the rtd-builds directory - copy.assert_called_once() + assert rmtree.call_count == 3 args, _ = copy.call_args self.assertIn('artifacts', args[0]) self.assertIn('sphinx', args[0]) @@ -38,16 +39,18 @@ def test_sync_files_clean_old_artifacts(self, rmtree, copy): @patch('readthedocs.projects.tasks.shutil.rmtree') def test_sync_files_pdf(self, rmtree, copy): sync_files( - self.project.pk, self.version.pk, 'sphinx', pdf=True + self.project.pk, self.version.pk, 'sphinx', pdf=True, delete_unsynced_media=True ) + epub, htmlzip = rmtree.call_args_list # epub is cleaned - rmtree.assert_called_once() - args, _ = rmtree.call_args + args, _ = epub self.assertIn('epub', args[0]) + args, _ = htmlzip + self.assertIn('htmlzip', args[0]) # Artifacts are copied to the media directory - copy.assert_called_once() + assert rmtree.call_count == 2 args, _ = copy.call_args self.assertIn('artifacts', args[0]) self.assertIn('sphinx_pdf', args[0]) @@ -57,16 +60,19 @@ def test_sync_files_pdf(self, rmtree, copy): @patch('readthedocs.projects.tasks.shutil.rmtree') def test_sync_files_epub(self, rmtree, copy): sync_files( - self.project.pk, self.version.pk, 'sphinx', epub=True + self.project.pk, self.version.pk, 'sphinx', epub=True, delete_unsynced_media=True ) - # pdf is cleaned - rmtree.assert_called_once() - args, _ = rmtree.call_args + pdf, htmlzip = rmtree.call_args_list + + # epub is cleaned + args, _ = pdf self.assertIn('pdf', args[0]) + args, _ = htmlzip + self.assertIn('htmlzip', args[0]) # Artifacts are copied to the media directory - copy.assert_called_once() + assert rmtree.call_count == 2 args, _ = copy.call_args self.assertIn('artifacts', args[0]) self.assertIn('sphinx_epub', args[0]) @@ -76,7 +82,7 @@ def test_sync_files_epub(self, rmtree, copy): @patch('readthedocs.projects.tasks.shutil.rmtree') def test_sync_files_localmedia(self, rmtree, copy): sync_files( - self.project.pk, self.version.pk, 'sphinx', localmedia=True + self.project.pk, self.version.pk, 'sphinx', localmedia=True, delete_unsynced_media=True ) pdf, epub = rmtree.call_args_list @@ -87,7 +93,7 @@ def test_sync_files_localmedia(self, rmtree, copy): self.assertIn('epub', args[0]) # Artifacts are copied to the media directory - copy.assert_called_once() + assert rmtree.call_count == 2 args, _ = copy.call_args self.assertIn('artifacts', args[0]) self.assertIn('sphinx_localmedia', args[0]) @@ -97,15 +103,17 @@ def test_sync_files_localmedia(self, rmtree, copy): @patch('readthedocs.projects.tasks.shutil.rmtree') def test_sync_files_search(self, rmtree, copy): sync_files( - self.project.pk, self.version.pk, 'sphinx', search=True + self.project.pk, self.version.pk, 'sphinx', search=True, delete_unsynced_media=True ) - pdf, epub = rmtree.call_args_list + pdf, epub, htmlzip = rmtree.call_args_list # pdf and epub are cleaned args, _ = pdf self.assertIn('pdf', args[0]) args, _ = epub self.assertIn('epub', args[0]) + args, _ = htmlzip + self.assertIn('htmlzip', args[0]) # Artifacts are copied to the media directory copy.assert_called_once()