Skip to content

Commit a457fc0

Browse files
authored
Merge pull request #5939 from dojutsu-user/indexing-speedup
Indexing speedup
2 parents 80f74e8 + 985488d commit a457fc0

File tree

2 files changed

+26
-20
lines changed

2 files changed

+26
-20
lines changed

readthedocs/search/management/commands/reindex_elasticsearch.py

Lines changed: 26 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010

1111
from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
1212
index_missing_objects)
13-
from ...utils import get_chunk
1413

1514
log = logging.getLogger(__name__)
1615

@@ -19,17 +18,32 @@ class Command(BaseCommand):
1918

2019
@staticmethod
2120
def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_class):
22-
total = queryset.count()
23-
chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE)
24-
25-
for chunk in chunks:
26-
data = {
27-
'app_label': app_label,
28-
'model_name': model_name,
29-
'document_class': document_class,
30-
'index_name': index_name,
31-
'chunk': chunk
32-
}
21+
chunk_size = settings.ES_TASK_CHUNK_SIZE
22+
qs_iterator = queryset.only('pk').iterator()
23+
is_iterator_empty = False
24+
25+
data = {
26+
'app_label': app_label,
27+
'model_name': model_name,
28+
'document_class': document_class,
29+
'index_name': index_name,
30+
}
31+
32+
while not is_iterator_empty:
33+
objects_id = []
34+
35+
try:
36+
for _ in range(chunk_size):
37+
pk = next(qs_iterator).pk
38+
objects_id.append(pk)
39+
40+
if pk % 5000 == 0:
41+
log.info('Total: %s', pk)
42+
43+
except StopIteration:
44+
is_iterator_empty = True
45+
46+
data['objects_id'] = objects_id
3347
yield index_objects_to_es.si(**data)
3448

3549
def _run_reindex_tasks(self, models, queue):

readthedocs/search/utils.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -94,14 +94,6 @@ def get_project_list_or_404(project_slug, user, version_slug=None):
9494
return project_list
9595

9696

97-
def get_chunk(total, chunk_size):
98-
"""Yield successive `chunk_size` chunks."""
99-
# Based on https://stackoverflow.com/a/312464
100-
# licensed under cc by-sa 3.0
101-
for i in range(0, total, chunk_size):
102-
yield (i, i + chunk_size)
103-
104-
10597
def _get_index(indices, index_name):
10698
"""
10799
Get Index from all the indices.

0 commit comments

Comments
 (0)