Skip to content

Commit a508020

Browse files
ericholschersafwanrahman
authored andcommitted
Merge pull request #4615 from safwanrahman/search_fix
fixing the indexing
2 parents 9a78698 + ad2d174 commit a508020

File tree

4 files changed

+28
-22
lines changed

4 files changed

+28
-22
lines changed

readthedocs/projects/tasks.py

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,6 @@
2929
from django.utils.translation import ugettext_lazy as _
3030
from slumber.exceptions import HttpClientError
3131

32-
from .constants import LOG_TEMPLATE
33-
from .exceptions import RepositoryError
34-
from .models import ImportedFile, Project, Domain, Feature, HTMLFile
35-
from .signals import before_vcs, after_vcs, before_build, after_build, files_changed, \
36-
bulk_post_create, bulk_post_delete
3732
from readthedocs.builds.constants import (
3833
BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED,
3934
BUILD_STATE_INSTALLING, LATEST, LATEST_VERBOSE_NAME, STABLE_VERBOSE_NAME)
@@ -59,12 +54,12 @@
5954
from readthedocs.search.parse_json import process_all_json_files
6055
from readthedocs.vcs_support import utils as vcs_support_utils
6156
from readthedocs.worker import app
62-
6357
from .constants import LOG_TEMPLATE
6458
from .exceptions import RepositoryError
65-
from .models import Domain, Feature, ImportedFile, Project
66-
from .signals import (
67-
after_build, after_vcs, before_build, before_vcs, files_changed)
59+
from .models import Domain, ImportedFile, Project
60+
from .models import HTMLFile
61+
from .signals import (after_build, after_vcs, before_build, before_vcs,
62+
bulk_post_create, bulk_post_delete, files_changed)
6863

6964
log = logging.getLogger(__name__)
7065

readthedocs/search/management/commands/reindex_elasticsearch.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111
from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
1212
index_missing_objects)
13-
from ...utils import chunk_queryset
13+
from ...utils import get_chunk
1414

1515
log = logging.getLogger(__name__)
1616

@@ -19,16 +19,16 @@ class Command(BaseCommand):
1919

2020
@staticmethod
2121
def _get_indexing_tasks(app_label, model_name, queryset, document_class, index_name):
22-
queryset = queryset.values_list('id', flat=True)
23-
chunked_queryset = chunk_queryset(queryset, settings.ES_TASK_CHUNK_SIZE)
22+
total = queryset.count()
23+
chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE)
2424

25-
for chunk in chunked_queryset:
25+
for chunk in chunks:
2626
data = {
2727
'app_label': app_label,
2828
'model_name': model_name,
2929
'document_class': document_class,
3030
'index_name': index_name,
31-
'objects_id': list(chunk)
31+
'chunk': chunk
3232
}
3333
yield index_objects_to_es.si(**data)
3434

readthedocs/search/tasks.py

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,14 +68,26 @@ def switch_es_index(app_label, model_name, index_name, new_index_name):
6868

6969

7070
@app.task(queue='web')
71-
def index_objects_to_es(app_label, model_name, document_class, index_name, objects_id):
71+
def index_objects_to_es(app_label, model_name, document_class, index_name,
72+
chunk=None, objects_id=None):
73+
74+
assert not (chunk and objects_id), "You can not pass both chunk and objects_id"
75+
7276
model = apps.get_model(app_label, model_name)
7377
document = _get_document(model=model, document_class=document_class)
7478

7579
# Use queryset from model as the ids are specific
76-
queryset = model.objects.all().filter(id__in=objects_id).iterator()
77-
log.info("Indexing model: {}, id:'{}'".format(model.__name__, objects_id))
78-
document().update(queryset, index_name=index_name)
80+
queryset = model.objects.all()
81+
if chunk:
82+
# Chunk is a tuple with start and end index of queryset
83+
start = chunk[0]
84+
end = chunk[1]
85+
queryset = queryset[start:end]
86+
elif objects_id:
87+
queryset = queryset.filter(id__in=objects_id)
88+
89+
log.info("Indexing model: {}, '{}' objects".format(model.__name__, queryset.count()))
90+
document().update(queryset.iterator(), index_name=index_name)
7991

8092

8193
@app.task(queue='web')

readthedocs/search/utils.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -323,10 +323,9 @@ def get_project_list_or_404(project_slug, user):
323323
return project_list
324324

325325

326-
def chunk_queryset(queryset, chunk_size):
327-
"""Yield successive `chunk_size` chunks of queryset."""
326+
def get_chunk(total, chunk_size):
327+
"""Yield successive `chunk_size` chunks"""
328328
# Based on https://stackoverflow.com/a/312464
329329
# licensed under cc by-sa 3.0
330-
total = queryset.count()
331330
for i in range(0, total, chunk_size):
332-
yield queryset[i:i + chunk_size]
331+
yield (i, i + chunk_size)

0 commit comments

Comments
 (0)