diff --git a/readthedocs/projects/signals.py b/readthedocs/projects/signals.py index d8a44eabfee..cbf48eb5406 100644 --- a/readthedocs/projects/signals.py +++ b/readthedocs/projects/signals.py @@ -2,7 +2,6 @@ import django.dispatch - before_vcs = django.dispatch.Signal(providing_args=['version', 'environmemt']) after_vcs = django.dispatch.Signal(providing_args=['version']) @@ -12,4 +11,4 @@ project_import = django.dispatch.Signal(providing_args=['project']) # Used to purge files from the CDN -files_changed = django.dispatch.Signal(providing_args=['project', 'version', 'files']) +files_changed = django.dispatch.Signal(providing_args=['project', 'version']) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index be2db8cf141..0b5c838aceb 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -1340,7 +1340,7 @@ def fileify(version_pk, commit, build, search_ranking, search_ignore): }, ) try: - changed_files = _create_imported_files( + _create_imported_files( version=version, commit=commit, build=build, @@ -1348,7 +1348,6 @@ def fileify(version_pk, commit, build, search_ranking, search_ignore): search_ignore=search_ignore, ) except Exception: - changed_files = set() log.exception('Failed during ImportedFile creation') try: @@ -1357,7 +1356,7 @@ def fileify(version_pk, commit, build, search_ranking, search_ignore): log.exception('Failed during SphinxDomain creation') try: - _sync_imported_files(version, build, changed_files) + _sync_imported_files(version, build) except Exception: log.exception('Failed during ImportedFile syncing') @@ -1530,24 +1529,15 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign :param version: Version instance :param commit: Commit that updated path :param build: Build id - :returns: paths of changed files - :rtype: set """ - changed_files = set() - # Re-create all objects from the new build of the version storage_path = version.project.get_storage_path( type_='html', version_slug=version.slug, include_file=False ) for root, __, filenames in build_media_storage.walk(storage_path): for filename in filenames: - if filename.endswith('.html'): - model_class = HTMLFile - elif version.project.cdn_enabled: - # We need to track all files for CDN enabled projects so the files can be purged - model_class = ImportedFile - else: - # For projects not behind a CDN, we don't care about non-HTML + # We don't care about non-HTML files + if not filename.endswith('.html'): continue full_path = build_media_storage.join(root, filename) @@ -1555,32 +1545,6 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign # Generate a relative path for storage similar to os.path.relpath relpath = full_path.replace(storage_path, '', 1).lstrip('/') - try: - md5 = hashlib.md5(build_media_storage.open(full_path, 'rb').read()).hexdigest() - except Exception: - log.exception( - 'Error while generating md5 for %s:%s:%s. Don\'t stop.', - version.project.slug, - version.slug, - relpath, - ) - md5 = '' - # Keep track of changed files to be purged in the CDN - obj = ( - model_class.objects - .filter(project=version.project, version=version, path=relpath) - .order_by('-modified_date') - .first() - ) - if obj and md5 and obj.md5 != md5: - changed_files.add( - resolve_path( - version.project, - filename=relpath, - version_slug=version.slug, - ), - ) - page_rank = 0 # Last pattern to match takes precedence # XXX: see if we can implement another type of precedence, @@ -1598,37 +1562,31 @@ def _create_imported_files(*, version, commit, build, search_ranking, search_ign break # Create imported files from new build - model_class.objects.create( + HTMLFile.objects.create( project=version.project, version=version, path=relpath, name=filename, - md5=md5, rank=page_rank, commit=commit, build=build, ignore=ignore, ) - # This signal is used for clearing the CDN, - # so send it as soon as we have the list of changed files + # This signal is used for purging the CDN. files_changed.send( sender=Project, project=version.project, version=version, - files=changed_files, ) - return changed_files - -def _sync_imported_files(version, build, changed_files): +def _sync_imported_files(version, build): """ Sync/Update/Delete ImportedFiles objects of this version. :param version: Version instance :param build: Build id - :param changed_files: path of changed files """ # Index new HTMLFiles to ElasticSearch diff --git a/readthedocs/rtd_tests/tests/test_imported_file.py b/readthedocs/rtd_tests/tests/test_imported_file.py index 0115250193a..6f530f51e48 100644 --- a/readthedocs/rtd_tests/tests/test_imported_file.py +++ b/readthedocs/rtd_tests/tests/test_imported_file.py @@ -47,7 +47,7 @@ def _manage_imported_files( search_ranking=search_ranking, search_ignore=search_ignore, ) - _sync_imported_files(version, build, set()) + _sync_imported_files(version, build) def _copy_storage_dir(self): """Copy the test directory (rtd_tests/files) to storage""" @@ -63,18 +63,14 @@ def _copy_storage_dir(self): def test_properly_created(self): # Only 2 files in the directory is HTML (test.html, api/index.html) self.assertEqual(ImportedFile.objects.count(), 0) - self._manage_imported_files(self.version, 'commit01', 1) + self._manage_imported_files(version=self.version, commit='commit01', build=1) self.assertEqual(ImportedFile.objects.count(), 2) - self._manage_imported_files(self.version, 'commit01', 2) + self._manage_imported_files(version=self.version, commit='commit01', build=2) self.assertEqual(ImportedFile.objects.count(), 2) self.project.cdn_enabled = True self.project.save() - # CDN enabled projects => save all files - self._manage_imported_files(self.version, 'commit01', 3) - self.assertEqual(ImportedFile.objects.count(), 4) - def test_update_commit(self): self.assertEqual(ImportedFile.objects.count(), 0) self._manage_imported_files(self.version, 'commit01', 1) @@ -168,7 +164,6 @@ def test_update_content(self): self._copy_storage_dir() self._manage_imported_files(self.version, 'commit01', 1) - self.assertEqual(ImportedFile.objects.get(name='test.html').md5, 'c7532f22a052d716f7b2310fb52ad981') self.assertEqual(ImportedFile.objects.count(), 2) with open(os.path.join(test_dir, 'test.html'), 'w+') as f: @@ -177,7 +172,6 @@ def test_update_content(self): self._copy_storage_dir() self._manage_imported_files(self.version, 'commit02', 2) - self.assertNotEqual(ImportedFile.objects.get(name='test.html').md5, 'c7532f22a052d716f7b2310fb52ad981') self.assertEqual(ImportedFile.objects.count(), 2) @override_settings(PRODUCTION_DOMAIN='readthedocs.org')