Skip to content

Refactor: fileify and _manage_imported_files #5333

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
144 changes: 144 additions & 0 deletions readthedocs/builds/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
import fnmatch
import hashlib
import logging
import os

from readthedocs.core.resolver import resolve_path
from readthedocs.projects.constants import LOG_TEMPLATE
from readthedocs.projects.models import HTMLFile, Project, ImportedFile
from readthedocs.projects.signals import (
bulk_post_create,
bulk_post_delete,
files_changed
)
from readthedocs.worker import app

from .models import Version


log = logging.getLogger(__name__)


@app.task(queue='web')
def fileify(version_pk, commit):
"""
Create ImportedFile objects for all of a version's files.

This is so we have an idea of what files we have in the database.
"""
version = Version.objects.get_object_or_log(pk=version_pk)
if not version:
return
project = version.project

if not commit:
log.info(
LOG_TEMPLATE.format(
project=project.slug,
version=version.slug,
msg=(
'Imported File not being built because no commit '
'information'
),
),
)
return

path = project.rtd_build_path(version.slug)
if path:
log.info(
LOG_TEMPLATE.format(
project=version.project.slug,
version=version.slug,
msg='Creating ImportedFiles',
),
)
_manage_imported_files(version, path, commit)
else:
log.info(
LOG_TEMPLATE.format(
project=project.slug,
version=version.slug,
msg='No ImportedFile files',
),
)


def _manage_imported_files(version, path, commit):
"""
Update imported files for version.

:param version: Version instance
:param path: Path to search
:param commit: Commit that updated path
"""
changed_files = set()
created_html_files = []
for root, __, filenames in os.walk(path):
for filename in filenames:
if fnmatch.fnmatch(filename, '*.html'):
model_class = HTMLFile
else:
model_class = ImportedFile

dirpath = os.path.join(
root.replace(path, '').lstrip('/'), filename.lstrip('/')
)
full_path = os.path.join(root, filename)
md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest()
try:
# pylint: disable=unpacking-non-sequence
obj, __ = model_class.objects.get_or_create(
project=version.project,
version=version,
path=dirpath,
name=filename,
)
except model_class.MultipleObjectsReturned:
log.warning('Error creating ImportedFile')
continue
if obj.md5 != md5:
obj.md5 = md5
changed_files.add(dirpath)
if obj.commit != commit:
obj.commit = commit
obj.save()

if model_class == HTMLFile:
# the `obj` is HTMLFile, so add it to the list
created_html_files.append(obj)

# Send bulk_post_create signal for bulk indexing to Elasticsearch
bulk_post_create.send(sender=HTMLFile, instance_list=created_html_files)

# Delete the HTMLFile first from previous commit and
# send bulk_post_delete signal for bulk removing from Elasticsearch
delete_queryset = (
HTMLFile.objects.filter(project=version.project,
version=version).exclude(commit=commit)
)
# Keep the objects into memory to send it to signal
instance_list = list(delete_queryset)
# Safely delete from database
delete_queryset.delete()
# Always pass the list of instance, not queryset.
bulk_post_delete.send(sender=HTMLFile, instance_list=instance_list)

# Delete ImportedFiles from previous versions
(
ImportedFile.objects.filter(project=version.project,
version=version).exclude(commit=commit
).delete()
)
changed_files = [
resolve_path(
version.project,
filename=file,
version_slug=version.slug,
) for file in changed_files
]
files_changed.send(
sender=Project,
project=version.project,
files=changed_files,
)
135 changes: 2 additions & 133 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,6 @@
"""

import datetime
import fnmatch
import hashlib
import json
import logging
import os
Expand Down Expand Up @@ -37,9 +35,9 @@
)
from readthedocs.builds.models import APIVersion, Build, Version
from readthedocs.builds.signals import build_complete
from readthedocs.builds.tasks import fileify
from readthedocs.builds.syncers import Syncer
from readthedocs.config import ConfigError
from readthedocs.core.resolver import resolve_path
from readthedocs.core.symlink import PrivateSymlink, PublicSymlink
from readthedocs.core.utils import broadcast, safe_unlink, send_email
from readthedocs.doc_builder.config import load_yaml_config
Expand All @@ -66,16 +64,13 @@

from .constants import LOG_TEMPLATE
from .exceptions import ProjectConfigurationError, RepositoryError
from .models import Domain, HTMLFile, ImportedFile, Project
from .models import Domain, Project
from .signals import (
after_build,
after_vcs,
before_build,
before_vcs,
bulk_post_create,
bulk_post_delete,
domain_verify,
files_changed,
)


Expand Down Expand Up @@ -655,7 +650,6 @@ def setup_vcs(self):
)
# Re raise the exception to stop the build at this point
raise

commit = self.project.vcs_repo(self.version.slug).commit
if commit:
self.build['commit'] = commit
Expand Down Expand Up @@ -1116,131 +1110,6 @@ def symlink_subproject(project_pk):
sym.symlink_subprojects()


@app.task(queue='web')
def fileify(version_pk, commit):
"""
Create ImportedFile objects for all of a version's files.

This is so we have an idea of what files we have in the database.
"""
version = Version.objects.get_object_or_log(pk=version_pk)
if not version:
return
project = version.project

if not commit:
log.info(
LOG_TEMPLATE.format(
project=project.slug,
version=version.slug,
msg=(
'Imported File not being built because no commit '
'information'
),
),
)
return

path = project.rtd_build_path(version.slug)
if path:
log.info(
LOG_TEMPLATE.format(
project=version.project.slug,
version=version.slug,
msg='Creating ImportedFiles',
),
)
_manage_imported_files(version, path, commit)
else:
log.info(
LOG_TEMPLATE.format(
project=project.slug,
version=version.slug,
msg='No ImportedFile files',
),
)


def _manage_imported_files(version, path, commit):
"""
Update imported files for version.

:param version: Version instance
:param path: Path to search
:param commit: Commit that updated path
"""
changed_files = set()
created_html_files = []
for root, __, filenames in os.walk(path):
for filename in filenames:
if fnmatch.fnmatch(filename, '*.html'):
model_class = HTMLFile
else:
model_class = ImportedFile

dirpath = os.path.join(
root.replace(path, '').lstrip('/'), filename.lstrip('/')
)
full_path = os.path.join(root, filename)
md5 = hashlib.md5(open(full_path, 'rb').read()).hexdigest()
try:
# pylint: disable=unpacking-non-sequence
obj, __ = model_class.objects.get_or_create(
project=version.project,
version=version,
path=dirpath,
name=filename,
)
except model_class.MultipleObjectsReturned:
log.warning('Error creating ImportedFile')
continue
if obj.md5 != md5:
obj.md5 = md5
changed_files.add(dirpath)
if obj.commit != commit:
obj.commit = commit
obj.save()

if model_class == HTMLFile:
# the `obj` is HTMLFile, so add it to the list
created_html_files.append(obj)

# Send bulk_post_create signal for bulk indexing to Elasticsearch
bulk_post_create.send(sender=HTMLFile, instance_list=created_html_files)

# Delete the HTMLFile first from previous commit and
# send bulk_post_delete signal for bulk removing from Elasticsearch
delete_queryset = (
HTMLFile.objects.filter(project=version.project,
version=version).exclude(commit=commit)
)
# Keep the objects into memory to send it to signal
instance_list = list(delete_queryset)
# Safely delete from database
delete_queryset.delete()
# Always pass the list of instance, not queryset.
bulk_post_delete.send(sender=HTMLFile, instance_list=instance_list)

# Delete ImportedFiles from previous versions
(
ImportedFile.objects.filter(project=version.project,
version=version).exclude(commit=commit
).delete()
)
changed_files = [
resolve_path(
version.project,
filename=file,
version_slug=version.slug,
) for file in changed_files
]
files_changed.send(
sender=Project,
project=version.project,
files=changed_files,
)


@app.task(queue='web')
def send_notifications(version_pk, build_pk):
version = Version.objects.get_object_or_log(pk=version_pk)
Expand Down
6 changes: 3 additions & 3 deletions readthedocs/rtd_tests/tests/test_celery.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,10 @@
from mock import MagicMock, patch

from readthedocs.builds.constants import LATEST
from readthedocs.builds.models import Build
from readthedocs.builds.models import Build, Version
from readthedocs.builds.tasks import fileify
from readthedocs.doc_builder.exceptions import VersionLockedError
from readthedocs.projects import tasks
from readthedocs.builds.models import Version
from readthedocs.projects.exceptions import RepositoryError
from readthedocs.projects.models import Project
from readthedocs.rtd_tests.base import RTDTestCase
Expand Down Expand Up @@ -267,5 +267,5 @@ def test_move_files_logging_when_wrong_version_pk(self, mock_logger):
@patch('readthedocs.builds.managers.log')
def test_fileify_logging_when_wrong_version_pk(self, mock_logger):
self.assertFalse(Version.objects.filter(pk=345343).exists())
tasks.fileify(version_pk=345343, commit=None)
fileify(version_pk=345343, commit=None)
mock_logger.warning.assert_called_with("Version not found for given kwargs. {'pk': 345343}")
2 changes: 1 addition & 1 deletion readthedocs/rtd_tests/tests/test_imported_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

from django.test import TestCase

from readthedocs.builds.tasks import _manage_imported_files
from readthedocs.projects.models import ImportedFile, Project
from readthedocs.projects.tasks import _manage_imported_files


base_dir = os.path.dirname(os.path.dirname(__file__))
Expand Down