Skip to content

Commit 6723e59

Browse files
authored
Search: index in small chunks (#10914)
* Search: index in small chunks The default is 500 documents, our documents have nested objects, that can make the final document really big, and timeout ES. Ref #10911 (comment) * Test with 100
1 parent 05263b8 commit 6723e59

File tree

2 files changed

+5
-2
lines changed

2 files changed

+5
-2
lines changed

readthedocs/projects/tasks/search.py

+3
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,9 @@ def _create_imported_files_and_search_index(
215215
document=PageDocument,
216216
objects=html_files_to_index,
217217
index_name=search_index_name,
218+
# Pages are indexed in small chunks to avoid a
219+
# large payload that will probably timeout ES.
220+
chunk_size=100,
218221
)
219222

220223
# Remove old HTMLFiles from ElasticSearch

readthedocs/search/utils.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
log = structlog.get_logger(__name__)
1111

1212

13-
def index_objects(document, objects, index_name=None):
13+
def index_objects(document, objects, index_name=None, chunk_size=500):
1414
if not DEDConfig.autosync_enabled():
1515
log.info("Autosync disabled, skipping searh indexing.")
1616
return
@@ -21,7 +21,7 @@ def index_objects(document, objects, index_name=None):
2121
if index_name:
2222
document._index._name = index_name
2323

24-
document().update(objects)
24+
document().update(objects, chunk_size=chunk_size)
2525

2626
# Restore the old index name.
2727
if index_name:

0 commit comments

Comments
 (0)