Skip to content

Commit ce4abaf

Browse files
committed
optimizing indexing
1 parent b9dbb5d commit ce4abaf

File tree

6 files changed

+63
-8
lines changed

6 files changed

+63
-8
lines changed

readthedocs/projects/signals.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,7 @@
1414
project_import = django.dispatch.Signal(providing_args=["project"])
1515

1616
files_changed = django.dispatch.Signal(providing_args=["project", "files"])
17+
18+
bulk_post_create = django.dispatch.Signal(providing_args=["instance_list"])
19+
20+
bulk_post_delete = django.dispatch.Signal(providing_args=["instance_list"])

readthedocs/projects/tasks.py

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@
3232
from .constants import LOG_TEMPLATE
3333
from .exceptions import RepositoryError
3434
from .models import ImportedFile, Project, Domain, Feature, HTMLFile
35-
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed
35+
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed, \
36+
bulk_post_create, bulk_post_delete
3637
from readthedocs.builds.constants import (
3738
BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED,
3839
BUILD_STATE_INSTALLING, LATEST, LATEST_VERBOSE_NAME, STABLE_VERBOSE_NAME)
@@ -986,6 +987,7 @@ def _manage_imported_files(version, path, commit):
986987
:param commit: Commit that updated path
987988
"""
988989
changed_files = set()
990+
created_html_files = []
989991
for root, __, filenames in os.walk(path):
990992
for filename in filenames:
991993
if fnmatch.fnmatch(filename, '*.html'):
@@ -1015,15 +1017,27 @@ def _manage_imported_files(version, path, commit):
10151017
obj.commit = commit
10161018
obj.save()
10171019

1018-
# Delete the HTMLFile first from previous versions
1019-
HTMLFile.objects.filter(project=version.project,
1020-
version=version
1021-
).exclude(commit=commit).delete()
1020+
if isinstance(obj, HTMLFile):
1021+
# the `obj` is HTMLFile, so add it to the list
1022+
created_html_files.append(obj)
1023+
1024+
# Send bulk_post_create signal for bulk indexing to Elasticsearch
1025+
bulk_post_create.send(sender=HTMLFile, instance_list=created_html_files)
1026+
1027+
# Delete the HTMLFile first from previous commit and
1028+
# send bulk_post_delete signal for bulk removing from Elasticsearch
1029+
delete_queryset = (HTMLFile.objects.filter(project=version.project, version=version)
1030+
.exclude(commit=commit))
1031+
# Keep the objects into memory to send it to signal
1032+
instance_list = list(delete_queryset)
1033+
# Safely delete from database
1034+
delete_queryset.delete()
1035+
# Always pass the list of instance, not queryset.
1036+
bulk_post_delete.send(sender=HTMLFile, instance_list=instance_list)
10221037

10231038
# Delete ImportedFiles from previous versions
1024-
ImportedFile.objects.filter(project=version.project,
1025-
version=version
1026-
).exclude(commit=commit).delete()
1039+
(ImportedFile.objects.filter(project=version.project, version=version)
1040+
.exclude(commit=commit).delete())
10271041
changed_files = [
10281042
resolve_path(
10291043
version.project, filename=file, version_slug=version.slug,

readthedocs/search/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
default_app_config = 'readthedocs.search.apps.SearchConfig'

readthedocs/search/apps.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
"""Project app config"""
2+
3+
from django.apps import AppConfig
4+
5+
6+
class SearchConfig(AppConfig):
7+
name = 'readthedocs.search'
8+
9+
def ready(self):
10+
from .signals import index_html_file, remove_html_file

readthedocs/search/documents.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ class PageDocument(RTDDocTypeMixin, DocType):
5252
class Meta(object):
5353
model = HTMLFile
5454
fields = ('commit',)
55+
ignore_signals = True
5556

5657
project = fields.KeywordField(attr='project.slug')
5758
version = fields.KeywordField(attr='version.slug')

readthedocs/search/signals.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,32 @@
11
"""We define custom Django signals to trigger before executing searches."""
22
from __future__ import absolute_import
33
import django.dispatch
4+
from django.dispatch import receiver
5+
from django_elasticsearch_dsl.registries import registry
6+
7+
from readthedocs.projects.models import HTMLFile
8+
from readthedocs.projects.signals import bulk_post_create, bulk_post_delete
9+
from readthedocs.search.documents import PageDocument
10+
from readthedocs.search.tasks import index_objects_to_es_task
411

512
before_project_search = django.dispatch.Signal(providing_args=["body"])
613
before_file_search = django.dispatch.Signal(providing_args=["body"])
714
before_section_search = django.dispatch.Signal(providing_args=["body"])
15+
16+
17+
@receiver(bulk_post_create, sender=HTMLFile)
18+
def index_html_file(instance_list, **_):
19+
kwargs = {
20+
'app_label': HTMLFile._meta.app_label,
21+
'model_name': HTMLFile.__name__,
22+
'document_class': str(PageDocument),
23+
'index_name': None, # No neeed to change the index name
24+
'objects_id': [obj.id for obj in instance_list],
25+
}
26+
27+
index_objects_to_es_task.delay(**kwargs)
28+
29+
30+
@receiver(bulk_post_delete, sender=HTMLFile)
31+
def remove_html_file(instance_list, **_):
32+
registry.delete(instance_list)

0 commit comments

Comments
 (0)