Skip to content

Storage updates #5698

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
May 30, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
GITLAB_URL,
PRIVACY_CHOICES,
PRIVATE,
MEDIA_TYPES,
)
from readthedocs.projects.models import APIProject, Project
from readthedocs.projects.version_handling import determine_stable_version
Expand Down Expand Up @@ -259,6 +260,11 @@ def delete(self, *args, **kwargs): # pylint: disable=arguments-differ
task=tasks.remove_dirs,
args=[self.get_artifact_paths()],
)

# Remove build artifacts from storage
storage_paths = self.get_storage_paths()
tasks.remove_build_storage_paths.delay(storage_paths)

project_pk = self.project.pk
super().delete(*args, **kwargs)
broadcast(
Expand Down Expand Up @@ -340,6 +346,25 @@ def get_artifact_paths(self):

return paths

def get_storage_paths(self):
"""
Return a list of all build artifact storage paths for this version.

:rtype: list
"""
paths = []

for type_ in MEDIA_TYPES:
paths.append(
self.project.get_storage_path(
type_=type_,
version_slug=self.slug,
include_file=False,
)
)

return paths

def clean_build_path(self):
"""
Clean build path for project version.
Expand Down
4 changes: 4 additions & 0 deletions readthedocs/builds/storage.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from pathlib import Path

from django.core.exceptions import SuspiciousFileOperation
from django.core.files.storage import FileSystemStorage
from storages.utils import safe_join, get_available_overwrite_name

Expand Down Expand Up @@ -51,6 +52,9 @@ def delete_directory(self, path):

:param path: the path to the directory to remove
"""
if path in ('', '/'):
raise SuspiciousFileOperation('Deleting all storage cannot be right')

log.debug('Deleting directory %s from media storage', path)
folders, files = self.listdir(self._dirpath(path))
for folder_name in folders:
Expand Down
2 changes: 0 additions & 2 deletions readthedocs/builds/syncers.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,12 @@
import shutil

from django.conf import settings
from django.core.files.storage import get_storage_class

from readthedocs.core.utils import safe_makedirs
from readthedocs.core.utils.extend import SettingsOverrideObject


log = logging.getLogger(__name__)
storage = get_storage_class()()


class BaseSyncer:
Expand Down
13 changes: 13 additions & 0 deletions readthedocs/projects/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,19 @@
('sphinx_singlehtml', _('Sphinx Single Page HTML')),
)

MEDIA_TYPE_HTML = 'html'
MEDIA_TYPE_PDF = 'pdf'
MEDIA_TYPE_EPUB = 'epub'
MEDIA_TYPE_HTMLZIP = 'htmlzip'
MEDIA_TYPE_JSON = 'json'
MEDIA_TYPES = (
MEDIA_TYPE_HTML,
MEDIA_TYPE_PDF,
MEDIA_TYPE_EPUB,
MEDIA_TYPE_HTMLZIP,
MEDIA_TYPE_JSON,
)

SAMPLE_FILES = (
('Installation', 'projects/samples/installation.rst.html'),
('Getting started', 'projects/samples/getting_started.rst.html'),
Expand Down
69 changes: 48 additions & 21 deletions readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,15 @@
from readthedocs.vcs_support.backends import backend_cls
from readthedocs.vcs_support.utils import Lock, NonBlockingLock

from .constants import (
MEDIA_TYPES,
MEDIA_TYPE_PDF,
MEDIA_TYPE_EPUB,
MEDIA_TYPE_HTMLZIP,
)


log = logging.getLogger(__name__)
storage = get_storage_class()()


class ProjectRelationship(models.Model):
Expand Down Expand Up @@ -463,6 +469,29 @@ def save(self, *args, **kwargs): # pylint: disable=arguments-differ
except Exception:
log.exception('Error creating default branches')

def delete(self, *args, **kwargs): # pylint: disable=arguments-differ
from readthedocs.projects import tasks

# Remove local FS build artifacts on the web servers
broadcast(
type='app',
task=tasks.remove_dirs,
args=[(self.doc_path,)],
)

# Remove build artifacts from storage
storage_paths = []
for type_ in MEDIA_TYPES:
storage_paths.append(
'{}/{}'.format(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it's better to use either os.path.join or pathlib.Path here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not a path on the local disk. This is a storage path which is different. It always uses forward slashes regardless of platform.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If we want to go fancy, what we need here is a PurePosixPath: it's platform independent and does not access the filesystem when manipulating paths.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The Django storage API does not implement a posix standard either. I believe that using pathlib is not the correct course of action because these are not filesystem paths.

type_,
self.slug,
)
)
tasks.remove_build_storage_paths.delay(storage_paths)

super().delete(*args, **kwargs)

def get_absolute_url(self):
return reverse('projects_detail', args=[self.slug])

Expand Down Expand Up @@ -746,32 +775,30 @@ def has_versions(self):
def has_aliases(self):
return self.aliases.exists()

def has_pdf(self, version_slug=LATEST):
def has_media(self, type_, version_slug=LATEST):
path = self.get_production_media_path(
type_='pdf', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='pdf', version_slug=version_slug
type_=type_, version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)
if os.path.exists(path):
return True

if settings.RTD_BUILD_MEDIA_STORAGE:
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
storage_path = self.get_storage_path(
type_=type_, version_slug=version_slug
)
return storage.exists(storage_path)

return False

def has_pdf(self, version_slug=LATEST):
return self.has_media(MEDIA_TYPE_PDF, version_slug=version_slug)

def has_epub(self, version_slug=LATEST):
path = self.get_production_media_path(
type_='epub', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='epub', version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)
return self.has_media(MEDIA_TYPE_EPUB, version_slug=version_slug)

def has_htmlzip(self, version_slug=LATEST):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: these 3 methods are exactly the same; the only change is type_ value. They could be refactored to use the same chunk of code.

Not needed to be done in this PR, though.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That seems like a worthwhile refactor and it seems useful as part of this PR.

path = self.get_production_media_path(
type_='htmlzip', version_slug=version_slug
)
storage_path = self.get_storage_path(
type_='htmlzip', version_slug=version_slug
)
return os.path.exists(path) or storage.exists(storage_path)
return self.has_media(MEDIA_TYPE_HTMLZIP, version_slug=version_slug)

@property
def sponsored(self):
Expand Down
14 changes: 14 additions & 0 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1680,6 +1680,20 @@ def remove_dirs(paths):
shutil.rmtree(path, ignore_errors=True)


@app.task(queue='web')
def remove_build_storage_paths(paths):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not sure what we decided here, but I think that build servers don't have the credentials (or the permissions) to make this operation. I think this task may be only executed by webs. In that case, queue='web' should be passed in the decorator.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should only be executed by webs typically in response to a project or version being deleted completely. I'll update it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think you forgot to push this change.

"""
Remove artifacts from build media storage (cloud or local storage)

:param paths: list of paths in build media storage to delete
"""
if settings.RTD_BUILD_MEDIA_STORAGE:
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
for storage_path in paths:
log.info('Removing %s from media storage', storage_path)
storage.delete_directory(storage_path)


@app.task(queue='web')
def sync_callback(_, version_pk, commit, *args, **kwargs):
"""
Expand Down
6 changes: 1 addition & 5 deletions readthedocs/projects/views/private.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,11 +210,7 @@ def project_delete(request, project_slug):
)

if request.method == 'POST':
broadcast(
type='app',
task=tasks.remove_dirs,
args=[(project.doc_path,)],
)
# Delete the project and all related files
project.delete()
messages.success(request, _('Project deleted'))
project_dashboard = reverse('projects_dashboard')
Expand Down
14 changes: 8 additions & 6 deletions readthedocs/projects/views/public.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@
log = logging.getLogger(__name__)
search_log = logging.getLogger(__name__ + '.search')
mimetypes.add_type('application/epub+zip', '.epub')
storage = get_storage_class()()


class ProjectIndex(ListView):
Expand Down Expand Up @@ -217,11 +216,14 @@ def project_download_media(request, project_slug, type_, version_slug):
)

if settings.DEFAULT_PRIVACY_LEVEL == 'public' or settings.DEBUG:
storage_path = version.project.get_storage_path(
type_=type_, version_slug=version_slug
)
if storage.exists(storage_path):
return HttpResponseRedirect(storage.url(storage_path))

if settings.RTD_BUILD_MEDIA_STORAGE:
storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)()
storage_path = version.project.get_storage_path(
type_=type_, version_slug=version_slug
)
if storage.exists(storage_path):
return HttpResponseRedirect(storage.url(storage_path))

media_path = os.path.join(
settings.MEDIA_URL,
Expand Down
2 changes: 1 addition & 1 deletion readthedocs/rtd_tests/tests/test_project_views.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ def test_delete_project(self):
response = self.client.get('/dashboard/pip/delete/')
self.assertEqual(response.status_code, 200)

with patch('readthedocs.projects.views.private.broadcast') as broadcast:
with patch('readthedocs.projects.models.broadcast') as broadcast:
response = self.client.post('/dashboard/pip/delete/')
self.assertEqual(response.status_code, 302)
self.assertFalse(Project.objects.filter(slug='pip').exists())
Expand Down