Skip to content

Build: don't track changed files #7874

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 17, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions readthedocs/projects/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

import django.dispatch


before_vcs = django.dispatch.Signal(providing_args=['version', 'environmemt'])
after_vcs = django.dispatch.Signal(providing_args=['version'])

Expand All @@ -12,4 +11,4 @@
project_import = django.dispatch.Signal(providing_args=['project'])

# Used to purge files from the CDN
files_changed = django.dispatch.Signal(providing_args=['project', 'version', 'files'])
files_changed = django.dispatch.Signal(providing_args=['project', 'version'])
56 changes: 7 additions & 49 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1340,15 +1340,14 @@ def fileify(version_pk, commit, build, search_ranking, search_ignore):
},
)
try:
changed_files = _create_imported_files(
_create_imported_files(
version=version,
commit=commit,
build=build,
search_ranking=search_ranking,
search_ignore=search_ignore,
)
except Exception:
changed_files = set()
log.exception('Failed during ImportedFile creation')

try:
Expand All @@ -1357,7 +1356,7 @@ def fileify(version_pk, commit, build, search_ranking, search_ignore):
log.exception('Failed during SphinxDomain creation')

try:
_sync_imported_files(version, build, changed_files)
_sync_imported_files(version, build)
except Exception:
log.exception('Failed during ImportedFile syncing')

Expand Down Expand Up @@ -1530,57 +1529,22 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign
:param version: Version instance
:param commit: Commit that updated path
:param build: Build id
:returns: paths of changed files
:rtype: set
"""
changed_files = set()

# Re-create all objects from the new build of the version
storage_path = version.project.get_storage_path(
type_='html', version_slug=version.slug, include_file=False
)
for root, __, filenames in build_media_storage.walk(storage_path):
for filename in filenames:
if filename.endswith('.html'):
model_class = HTMLFile
elif version.project.cdn_enabled:
# We need to track all files for CDN enabled projects so the files can be purged
model_class = ImportedFile
else:
# For projects not behind a CDN, we don't care about non-HTML
# We don't care about non-HTML files
if not filename.endswith('.html'):
continue

full_path = build_media_storage.join(root, filename)

# Generate a relative path for storage similar to os.path.relpath
relpath = full_path.replace(storage_path, '', 1).lstrip('/')

try:
md5 = hashlib.md5(build_media_storage.open(full_path, 'rb').read()).hexdigest()
except Exception:
log.exception(
'Error while generating md5 for %s:%s:%s. Don\'t stop.',
version.project.slug,
version.slug,
relpath,
)
md5 = ''
# Keep track of changed files to be purged in the CDN
obj = (
model_class.objects
.filter(project=version.project, version=version, path=relpath)
.order_by('-modified_date')
.first()
)
if obj and md5 and obj.md5 != md5:
changed_files.add(
resolve_path(
version.project,
filename=relpath,
version_slug=version.slug,
),
)

page_rank = 0
# Last pattern to match takes precedence
# XXX: see if we can implement another type of precedence,
Expand All @@ -1598,37 +1562,31 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign
break

# Create imported files from new build
model_class.objects.create(
HTMLFile.objects.create(
project=version.project,
version=version,
path=relpath,
name=filename,
md5=md5,
rank=page_rank,
commit=commit,
build=build,
ignore=ignore,
)

# This signal is used for clearing the CDN,
# so send it as soon as we have the list of changed files
# This signal is used for purging the CDN.
files_changed.send(
sender=Project,
project=version.project,
version=version,
files=changed_files,
)

return changed_files


def _sync_imported_files(version, build, changed_files):
def _sync_imported_files(version, build):
"""
Sync/Update/Delete ImportedFiles objects of this version.

:param version: Version instance
:param build: Build id
:param changed_files: path of changed files
"""

# Index new HTMLFiles to ElasticSearch
Expand Down
12 changes: 3 additions & 9 deletions readthedocs/rtd_tests/tests/test_imported_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def _manage_imported_files(
search_ranking=search_ranking,
search_ignore=search_ignore,
)
_sync_imported_files(version, build, set())
_sync_imported_files(version, build)

def _copy_storage_dir(self):
"""Copy the test directory (rtd_tests/files) to storage"""
Expand All @@ -63,18 +63,14 @@ def _copy_storage_dir(self):
def test_properly_created(self):
# Only 2 files in the directory is HTML (test.html, api/index.html)
self.assertEqual(ImportedFile.objects.count(), 0)
self._manage_imported_files(self.version, 'commit01', 1)
self._manage_imported_files(version=self.version, commit='commit01', build=1)
self.assertEqual(ImportedFile.objects.count(), 2)
self._manage_imported_files(self.version, 'commit01', 2)
self._manage_imported_files(version=self.version, commit='commit01', build=2)
self.assertEqual(ImportedFile.objects.count(), 2)

self.project.cdn_enabled = True
self.project.save()

# CDN enabled projects => save all files
self._manage_imported_files(self.version, 'commit01', 3)
self.assertEqual(ImportedFile.objects.count(), 4)

def test_update_commit(self):
self.assertEqual(ImportedFile.objects.count(), 0)
self._manage_imported_files(self.version, 'commit01', 1)
Expand Down Expand Up @@ -168,7 +164,6 @@ def test_update_content(self):
self._copy_storage_dir()

self._manage_imported_files(self.version, 'commit01', 1)
self.assertEqual(ImportedFile.objects.get(name='test.html').md5, 'c7532f22a052d716f7b2310fb52ad981')
self.assertEqual(ImportedFile.objects.count(), 2)

with open(os.path.join(test_dir, 'test.html'), 'w+') as f:
Expand All @@ -177,7 +172,6 @@ def test_update_content(self):
self._copy_storage_dir()

self._manage_imported_files(self.version, 'commit02', 2)
self.assertNotEqual(ImportedFile.objects.get(name='test.html').md5, 'c7532f22a052d716f7b2310fb52ad981')
self.assertEqual(ImportedFile.objects.count(), 2)

@override_settings(PRODUCTION_DOMAIN='readthedocs.org')
Expand Down