From e64b0059efbbafd852d3fe82abf12ebcdad4bce6 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Tue, 14 May 2019 14:26:50 -0700 Subject: [PATCH 1/4] Storage updates - Delete from storage when a project or version is deleted - Project delete now handled in a method on the model - Define media type constants (next step: use them everywhere) - A few places still using default storage instead of RTD_BUILD_MEDIA_STORAGE --- readthedocs/builds/models.py | 25 +++++++ readthedocs/builds/storage.py | 4 ++ readthedocs/builds/syncers.py | 2 - readthedocs/projects/constants.py | 13 ++++ readthedocs/projects/models.py | 71 +++++++++++++++---- readthedocs/projects/tasks.py | 14 ++++ readthedocs/projects/views/private.py | 6 +- readthedocs/projects/views/public.py | 14 ++-- .../rtd_tests/tests/test_project_views.py | 2 +- 9 files changed, 124 insertions(+), 27 deletions(-) diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 493c4997fd2..63da874a39a 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -24,6 +24,7 @@ GITLAB_URL, PRIVACY_CHOICES, PRIVATE, + MEDIA_TYPES, ) from readthedocs.projects.models import APIProject, Project from readthedocs.projects.version_handling import determine_stable_version @@ -259,6 +260,11 @@ def delete(self, *args, **kwargs): # pylint: disable=arguments-differ task=tasks.remove_dirs, args=[self.get_artifact_paths()], ) + + # Remove build artifacts from storage + storage_paths = self.get_storage_paths() + tasks.remove_build_storage_paths.delay(storage_paths) + project_pk = self.project.pk super().delete(*args, **kwargs) broadcast( @@ -340,6 +346,25 @@ def get_artifact_paths(self): return paths + def get_storage_paths(self): + """ + Return a list of all build artifact storage paths for this version. + + :rtype: list + """ + paths = [] + + for type_ in MEDIA_TYPES: + paths.append( + self.project.get_storage_path( + type_=type_, + version_slug=self.slug, + include_file=False, + ) + ) + + return paths + def clean_build_path(self): """ Clean build path for project version. diff --git a/readthedocs/builds/storage.py b/readthedocs/builds/storage.py index f79406428a9..06cc795e2c0 100644 --- a/readthedocs/builds/storage.py +++ b/readthedocs/builds/storage.py @@ -1,6 +1,7 @@ import logging from pathlib import Path +from django.core.exceptions import SuspiciousFileOperation from django.core.files.storage import FileSystemStorage from storages.utils import safe_join, get_available_overwrite_name @@ -51,6 +52,9 @@ def delete_directory(self, path): :param path: the path to the directory to remove """ + if path in ('', '/'): + raise SuspiciousFileOperation('Deleting all storage cannot be right') + log.debug('Deleting directory %s from media storage', path) folders, files = self.listdir(self._dirpath(path)) for folder_name in folders: diff --git a/readthedocs/builds/syncers.py b/readthedocs/builds/syncers.py index bee327e88e0..7241288b9cd 100644 --- a/readthedocs/builds/syncers.py +++ b/readthedocs/builds/syncers.py @@ -10,14 +10,12 @@ import shutil from django.conf import settings -from django.core.files.storage import get_storage_class from readthedocs.core.utils import safe_makedirs from readthedocs.core.utils.extend import SettingsOverrideObject log = logging.getLogger(__name__) -storage = get_storage_class()() class BaseSyncer: diff --git a/readthedocs/projects/constants.py b/readthedocs/projects/constants.py index a80e22f19e9..7c83cffd7ea 100644 --- a/readthedocs/projects/constants.py +++ b/readthedocs/projects/constants.py @@ -19,6 +19,19 @@ ('sphinx_singlehtml', _('Sphinx Single Page HTML')), ) +MEDIA_TYPE_HTML = 'html' +MEDIA_TYPE_PDF = 'pdf' +MEDIA_TYPE_EPUB = 'epub' +MEDIA_TYPE_HTMLZIP = 'htmlzip' +MEDIA_TYPE_JSON = 'json' +MEDIA_TYPES = ( + MEDIA_TYPE_HTML, + MEDIA_TYPE_PDF, + MEDIA_TYPE_EPUB, + MEDIA_TYPE_HTMLZIP, + MEDIA_TYPE_JSON, +) + SAMPLE_FILES = ( ('Installation', 'projects/samples/installation.rst.html'), ('Getting started', 'projects/samples/getting_started.rst.html'), diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 04e092e7f10..f048d7646f6 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -42,9 +42,10 @@ from readthedocs.vcs_support.backends import backend_cls from readthedocs.vcs_support.utils import Lock, NonBlockingLock +from .constants import MEDIA_TYPES + log = logging.getLogger(__name__) -storage = get_storage_class()() class ProjectRelationship(models.Model): @@ -463,6 +464,29 @@ def save(self, *args, **kwargs): # pylint: disable=arguments-differ except Exception: log.exception('Error creating default branches') + def delete(self, *args, **kwargs): # pylint: disable=arguments-differ + from readthedocs.projects import tasks + + # Remove local FS build artifacts on the web servers + broadcast( + type='app', + task=tasks.remove_dirs, + args=[(self.doc_path,)], + ) + + # Remove build artifacts from storage + storage_paths = [] + for type_ in MEDIA_TYPES: + storage_paths.append( + '{}/{}'.format( + type_, + self.slug, + ) + ) + tasks.remove_build_storage_paths.delay(storage_paths) + + super().delete(*args, **kwargs) + def get_absolute_url(self): return reverse('projects_detail', args=[self.slug]) @@ -750,28 +774,49 @@ def has_pdf(self, version_slug=LATEST): path = self.get_production_media_path( type_='pdf', version_slug=version_slug ) - storage_path = self.get_storage_path( - type_='pdf', version_slug=version_slug - ) - return os.path.exists(path) or storage.exists(storage_path) + if os.path.exists(path): + return True + + if settings.RTD_BUILD_MEDIA_STORAGE: + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + storage_path = self.get_storage_path( + type_='pdf', version_slug=version_slug + ) + return storage.exists(storage_path) + + return False def has_epub(self, version_slug=LATEST): path = self.get_production_media_path( type_='epub', version_slug=version_slug ) - storage_path = self.get_storage_path( - type_='epub', version_slug=version_slug - ) - return os.path.exists(path) or storage.exists(storage_path) + if os.path.exists(path): + return True + + if settings.RTD_BUILD_MEDIA_STORAGE: + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + storage_path = self.get_storage_path( + type_='epub', version_slug=version_slug + ) + return storage.exists(storage_path) + + return False def has_htmlzip(self, version_slug=LATEST): path = self.get_production_media_path( type_='htmlzip', version_slug=version_slug ) - storage_path = self.get_storage_path( - type_='htmlzip', version_slug=version_slug - ) - return os.path.exists(path) or storage.exists(storage_path) + if os.path.exists(path): + return True + + if settings.RTD_BUILD_MEDIA_STORAGE: + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + storage_path = self.get_storage_path( + type_='htmlzip', version_slug=version_slug + ) + return storage.exists(storage_path) + + return False @property def sponsored(self): diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 4291f1dbc05..67fb5d6bb7f 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1680,6 +1680,20 @@ def remove_dirs(paths): shutil.rmtree(path, ignore_errors=True) +@app.task() +def remove_build_storage_paths(paths): + """ + Remove artifacts from build media storage (cloud or local storage) + + :param paths: list of paths in build media storage to delete + """ + if settings.RTD_BUILD_MEDIA_STORAGE: + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + for storage_path in paths: + log.info('Removing %s from media storage', storage_path) + storage.delete_directory(storage_path) + + @app.task(queue='web') def sync_callback(_, version_pk, commit, *args, **kwargs): """ diff --git a/readthedocs/projects/views/private.py b/readthedocs/projects/views/private.py index d360fecd408..c43eec68fe8 100644 --- a/readthedocs/projects/views/private.py +++ b/readthedocs/projects/views/private.py @@ -210,11 +210,7 @@ def project_delete(request, project_slug): ) if request.method == 'POST': - broadcast( - type='app', - task=tasks.remove_dirs, - args=[(project.doc_path,)], - ) + # Delete the project and all related files project.delete() messages.success(request, _('Project deleted')) project_dashboard = reverse('projects_dashboard') diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 8003778da0f..64d82272738 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -33,7 +33,6 @@ log = logging.getLogger(__name__) search_log = logging.getLogger(__name__ + '.search') mimetypes.add_type('application/epub+zip', '.epub') -storage = get_storage_class()() class ProjectIndex(ListView): @@ -217,11 +216,14 @@ def project_download_media(request, project_slug, type_, version_slug): ) if settings.DEFAULT_PRIVACY_LEVEL == 'public' or settings.DEBUG: - storage_path = version.project.get_storage_path( - type_=type_, version_slug=version_slug - ) - if storage.exists(storage_path): - return HttpResponseRedirect(storage.url(storage_path)) + + if settings.RTD_BUILD_MEDIA_STORAGE: + storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + storage_path = version.project.get_storage_path( + type_=type_, version_slug=version_slug + ) + if storage.exists(storage_path): + return HttpResponseRedirect(storage.url(storage_path)) media_path = os.path.join( settings.MEDIA_URL, diff --git a/readthedocs/rtd_tests/tests/test_project_views.py b/readthedocs/rtd_tests/tests/test_project_views.py index 1396df1add3..e5d151ead7f 100644 --- a/readthedocs/rtd_tests/tests/test_project_views.py +++ b/readthedocs/rtd_tests/tests/test_project_views.py @@ -404,7 +404,7 @@ def test_delete_project(self): response = self.client.get('/dashboard/pip/delete/') self.assertEqual(response.status_code, 200) - with patch('readthedocs.projects.views.private.broadcast') as broadcast: + with patch('readthedocs.projects.models.broadcast') as broadcast: response = self.client.post('/dashboard/pip/delete/') self.assertEqual(response.status_code, 302) self.assertFalse(Project.objects.filter(slug='pip').exists()) From 0094f422f822f4bb1131cbc826740bb9a5043d90 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Wed, 15 May 2019 16:35:24 -0700 Subject: [PATCH 2/4] Refactor the has media functions --- readthedocs/projects/models.py | 39 +++++++--------------------------- 1 file changed, 8 insertions(+), 31 deletions(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index f048d7646f6..139f0c87614 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -770,9 +770,9 @@ def has_versions(self): def has_aliases(self): return self.aliases.exists() - def has_pdf(self, version_slug=LATEST): + def has_media(self, type_, version_slug=LATEST): path = self.get_production_media_path( - type_='pdf', version_slug=version_slug + type_=type_, version_slug=version_slug ) if os.path.exists(path): return True @@ -780,43 +780,20 @@ def has_pdf(self, version_slug=LATEST): if settings.RTD_BUILD_MEDIA_STORAGE: storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() storage_path = self.get_storage_path( - type_='pdf', version_slug=version_slug + type_=type_, version_slug=version_slug ) return storage.exists(storage_path) return False - def has_epub(self, version_slug=LATEST): - path = self.get_production_media_path( - type_='epub', version_slug=version_slug - ) - if os.path.exists(path): - return True - - if settings.RTD_BUILD_MEDIA_STORAGE: - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - storage_path = self.get_storage_path( - type_='epub', version_slug=version_slug - ) - return storage.exists(storage_path) + def has_pdf(self, version_slug=LATEST): + return self.has_media('pdf', version_slug=version_slug) - return False + def has_epub(self, version_slug=LATEST): + return self.has_media('epub', version_slug=version_slug) def has_htmlzip(self, version_slug=LATEST): - path = self.get_production_media_path( - type_='htmlzip', version_slug=version_slug - ) - if os.path.exists(path): - return True - - if settings.RTD_BUILD_MEDIA_STORAGE: - storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() - storage_path = self.get_storage_path( - type_='htmlzip', version_slug=version_slug - ) - return storage.exists(storage_path) - - return False + return self.has_media('htmlzip', version_slug=version_slug) @property def sponsored(self): From b48cdc4d5d4a1f1025358a80ade49be1c09827e2 Mon Sep 17 00:00:00 2001 From: David Fischer Date: Wed, 15 May 2019 17:16:29 -0700 Subject: [PATCH 3/4] Use constants --- readthedocs/projects/models.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 139f0c87614..a5bbb9a132a 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -42,7 +42,12 @@ from readthedocs.vcs_support.backends import backend_cls from readthedocs.vcs_support.utils import Lock, NonBlockingLock -from .constants import MEDIA_TYPES +from .constants import ( + MEDIA_TYPES, + MEDIA_TYPE_PDF, + MEDIA_TYPE_EPUB, + MEDIA_TYPE_HTMLZIP, +) log = logging.getLogger(__name__) @@ -787,13 +792,13 @@ def has_media(self, type_, version_slug=LATEST): return False def has_pdf(self, version_slug=LATEST): - return self.has_media('pdf', version_slug=version_slug) + return self.has_media(MEDIA_TYPE_PDF, version_slug=version_slug) def has_epub(self, version_slug=LATEST): - return self.has_media('epub', version_slug=version_slug) + return self.has_media(MEDIA_TYPE_EPUB, version_slug=version_slug) def has_htmlzip(self, version_slug=LATEST): - return self.has_media('htmlzip', version_slug=version_slug) + return self.has_media(MEDIA_TYPE_HTMLZIP, version_slug=version_slug) @property def sponsored(self): From b9592183d7f96f93f76b11b43ec808948de62bac Mon Sep 17 00:00:00 2001 From: David Fischer Date: Thu, 16 May 2019 12:22:12 -0700 Subject: [PATCH 4/4] Limit task to web queues --- readthedocs/projects/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 67fb5d6bb7f..38182c095b5 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1680,7 +1680,7 @@ def remove_dirs(paths): shutil.rmtree(path, ignore_errors=True) -@app.task() +@app.task(queue='web') def remove_build_storage_paths(paths): """ Remove artifacts from build media storage (cloud or local storage)