Skip to content

Commit dde41e8

Browse files
authored
Merge pull request #6323 from stsewd/clean-project-resources
Remove files from storage and delete indexes from ES when no longer needed
2 parents f897447 + 8b08bdc commit dde41e8

File tree

6 files changed

+120
-30
lines changed

6 files changed

+120
-30
lines changed

readthedocs/builds/models.py

+2-3
Original file line numberDiff line numberDiff line change
@@ -333,10 +333,9 @@ def delete(self, *args, **kwargs): # pylint: disable=arguments-differ
333333
args=[self.get_artifact_paths()],
334334
)
335335

336-
# Remove build artifacts from storage if the version is not external
336+
# Remove resources if the version is not external
337337
if self.type != EXTERNAL:
338-
storage_paths = self.get_storage_paths()
339-
tasks.remove_build_storage_paths.delay(storage_paths)
338+
tasks.clean_project_resources(self.project, self)
340339

341340
project_pk = self.project.pk
342341
super().delete(*args, **kwargs)

readthedocs/projects/models.py

+15-10
Original file line numberDiff line numberDiff line change
@@ -480,16 +480,8 @@ def delete(self, *args, **kwargs): # pylint: disable=arguments-differ
480480
args=[(self.doc_path,)],
481481
)
482482

483-
# Remove build artifacts from storage
484-
storage_paths = []
485-
for type_ in MEDIA_TYPES:
486-
storage_paths.append(
487-
'{}/{}'.format(
488-
type_,
489-
self.slug,
490-
)
491-
)
492-
tasks.remove_build_storage_paths.delay(storage_paths)
483+
# Remove extra resources
484+
tasks.clean_project_resources(self)
493485

494486
super().delete(*args, **kwargs)
495487

@@ -534,6 +526,19 @@ def get_subproject_urls(self):
534526
return [(proj.child.slug, proj.child.get_docs_url())
535527
for proj in self.subprojects.all()]
536528

529+
def get_storage_paths(self):
530+
"""
531+
Get the paths of all artifacts used by the project.
532+
533+
:return: the path to an item in storage
534+
(can be used with ``storage.url`` to get the URL).
535+
"""
536+
storage_paths = [
537+
f'{type_}/{self.slug}'
538+
for type_ in MEDIA_TYPES
539+
]
540+
return storage_paths
541+
537542
def get_storage_path(
538543
self,
539544
type_,

readthedocs/projects/tasks.py

+44-2
Original file line numberDiff line numberDiff line change
@@ -1621,8 +1621,9 @@ def _sync_imported_files(version, build, changed_files):
16211621
# Remove old HTMLFiles from ElasticSearch
16221622
remove_indexed_files(
16231623
model=HTMLFile,
1624-
version=version,
1625-
build=build,
1624+
project_slug=version.project.slug,
1625+
version_slug=version.slug,
1626+
build_id=build,
16261627
)
16271628

16281629
# Delete SphinxDomain objects from previous versions
@@ -1844,6 +1845,47 @@ def remove_build_storage_paths(paths):
18441845
storage.delete_directory(storage_path)
18451846

18461847

1848+
@app.task(queue='web')
1849+
def remove_search_indexes(project_slug, version_slug=None):
1850+
"""Wrapper around ``remove_indexed_files`` to make it a task."""
1851+
remove_indexed_files(
1852+
model=HTMLFile,
1853+
project_slug=project_slug,
1854+
version_slug=version_slug,
1855+
)
1856+
1857+
1858+
def clean_project_resources(project, version=None):
1859+
"""
1860+
Delete all extra resources used by `version` of `project`.
1861+
1862+
It removes:
1863+
1864+
- Artifacts from storage.
1865+
- Search indexes from ES.
1866+
1867+
:param version: Version instance. If isn't given,
1868+
all resources of `project` will be deleted.
1869+
1870+
.. note::
1871+
This function is usually called just before deleting project.
1872+
Make sure to not depend on the project object inside the tasks.
1873+
"""
1874+
# Remove storage paths
1875+
storage_paths = []
1876+
if version:
1877+
storage_paths = version.get_storage_paths()
1878+
else:
1879+
storage_paths = project.get_storage_paths()
1880+
remove_build_storage_paths.delay(storage_paths)
1881+
1882+
# Remove indexes
1883+
remove_search_indexes.delay(
1884+
project_slug=project.slug,
1885+
version_slug=version.slug if version else None,
1886+
)
1887+
1888+
18471889
@app.task(queue='web')
18481890
def sync_callback(_, version_pk, commit, build, *args, **kwargs):
18491891
"""

readthedocs/projects/views/private.py

+4
Original file line numberDiff line numberDiff line change
@@ -203,6 +203,10 @@ def form_valid(self, form):
203203
task=tasks.remove_dirs,
204204
args=[version.get_artifact_paths()],
205205
)
206+
tasks.clean_project_resources(
207+
version.project,
208+
version,
209+
)
206210
version.built = False
207211
version.save()
208212
return HttpResponseRedirect(self.get_success_url())

readthedocs/rtd_tests/tests/test_build_forms.py

+37-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
# -*- coding: utf-8 -*-
2-
1+
import mock
2+
from django.contrib.auth.models import User
33
from django.test import TestCase
4+
from django.urls import reverse
45
from django_dynamic_fixture import get
56

67
from readthedocs.builds.forms import VersionForm
@@ -12,7 +13,8 @@
1213
class TestVersionForm(TestCase):
1314

1415
def setUp(self):
15-
self.project = get(Project)
16+
self.user = get(User)
17+
self.project = get(Project, users=(self.user,))
1618

1719
def test_default_version_is_active(self):
1820
version = get(
@@ -50,3 +52,35 @@ def test_default_version_is_inactive(self):
5052
)
5153
self.assertFalse(form.is_valid())
5254
self.assertIn('active', form.errors)
55+
56+
@mock.patch('readthedocs.projects.tasks.clean_project_resources')
57+
def test_resources_are_deleted_when_version_is_inactive(self, clean_project_resources):
58+
version = get(
59+
Version,
60+
project=self.project,
61+
active=True,
62+
)
63+
64+
url = reverse('project_version_detail', args=(version.project.slug, version.slug))
65+
66+
self.client.force_login(self.user)
67+
68+
r = self.client.post(
69+
url,
70+
data={
71+
'active': True,
72+
'privacy_level': PRIVATE,
73+
},
74+
)
75+
self.assertTrue(r.status_code, 200)
76+
clean_project_resources.assert_not_called()
77+
78+
r = self.client.post(
79+
url,
80+
data={
81+
'active': False,
82+
'privacy_level': PRIVATE,
83+
},
84+
)
85+
self.assertTrue(r.status_code, 200)
86+
clean_project_resources.assert_called_once()

readthedocs/search/utils.py

+18-12
Original file line numberDiff line numberDiff line change
@@ -43,35 +43,41 @@ def index_new_files(model, version, build):
4343
log.exception('Unable to index a subset of files. Continuing.')
4444

4545

46-
def remove_indexed_files(model, version, build):
46+
def remove_indexed_files(model, project_slug, version_slug=None, build_id=None):
4747
"""
48-
Remove files from the version from the search index.
48+
Remove files from `version_slug` of `project_slug` from the search index.
4949
50-
This excludes files from the current build.
50+
:param model: Class of the model to be deleted.
51+
:param project_slug: Project slug.
52+
:param version_slug: Version slug. If isn't given,
53+
all index from `project` are deleted.
54+
:param build_id: Build id. If isn't given, all index from `version` are deleted.
5155
"""
5256

5357
if not DEDConfig.autosync_enabled():
5458
log.info(
5559
'Autosync disabled, skipping removal from the search index for: %s:%s',
56-
version.project.slug,
57-
version.slug,
60+
project_slug,
61+
version_slug,
5862
)
5963
return
6064

6165
try:
6266
document = list(registry.get_documents(models=[model]))[0]
6367
log.info(
6468
'Deleting old files from search index for: %s:%s',
65-
version.project.slug,
66-
version.slug,
69+
project_slug,
70+
version_slug,
6771
)
68-
(
72+
documents = (
6973
document().search()
70-
.filter('term', project=version.project.slug)
71-
.filter('term', version=version.slug)
72-
.exclude('term', build=build)
73-
.delete()
74+
.filter('term', project=project_slug)
7475
)
76+
if version_slug:
77+
documents.filter('term', version=version_slug)
78+
if build_id:
79+
documents.exclude('term', build=build_id)
80+
documents.delete()
7581
except Exception:
7682
log.exception('Unable to delete a subset of files. Continuing.')
7783

0 commit comments

Comments
 (0)