From 4c0f25e6df3f01e721b6c5ab6bef7320fc756a4c Mon Sep 17 00:00:00 2001
From: Santos Gallegos <stsewd@proton.me>
Date: Mon, 20 Nov 2023 15:30:05 -0500
Subject: [PATCH 1/2] Search: index in small chunks

The default is 500 documents, our documents have nested objects,
that can make the final document really big, and timeout ES.

Ref https://github.com/readthedocs/readthedocs.org/issues/10911#issuecomment-1819688230
---
 readthedocs/projects/tasks/search.py | 3 +++
 readthedocs/search/utils.py          | 4 ++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/readthedocs/projects/tasks/search.py b/readthedocs/projects/tasks/search.py
index f065a957d28..f6751bde577 100644
--- a/readthedocs/projects/tasks/search.py
+++ b/readthedocs/projects/tasks/search.py
@@ -215,6 +215,9 @@ def _create_imported_files_and_search_index(
             document=PageDocument,
             objects=html_files_to_index,
             index_name=search_index_name,
+            # Pages are indexed in small chunks to avoid a
+            # large payload that will probably timeout ES.
+            chunk_size=25,
         )
 
     # Remove old HTMLFiles from ElasticSearch
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 4dd10b49e39..81f50e63f8c 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -10,7 +10,7 @@
 log = structlog.get_logger(__name__)
 
 
-def index_objects(document, objects, index_name=None):
+def index_objects(document, objects, index_name=None, chunk_size=500):
     if not DEDConfig.autosync_enabled():
         log.info("Autosync disabled, skipping searh indexing.")
         return
@@ -21,7 +21,7 @@ def index_objects(document, objects, index_name=None):
     if index_name:
         document._index._name = index_name
 
-    document().update(objects)
+    document().update(objects, chunk_size=chunk_size)
 
     # Restore the old index name.
     if index_name:

From 8ca5ea0386c72708f5dfd47cc455d8ad91dd3622 Mon Sep 17 00:00:00 2001
From: Santos Gallegos <stsewd@proton.me>
Date: Wed, 22 Nov 2023 10:23:34 -0500
Subject: [PATCH 2/2] Test with 100

---
 readthedocs/projects/tasks/search.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs/projects/tasks/search.py b/readthedocs/projects/tasks/search.py
index f6751bde577..0423b2fe403 100644
--- a/readthedocs/projects/tasks/search.py
+++ b/readthedocs/projects/tasks/search.py
@@ -217,7 +217,7 @@ def _create_imported_files_and_search_index(
             index_name=search_index_name,
             # Pages are indexed in small chunks to avoid a
             # large payload that will probably timeout ES.
-            chunk_size=25,
+            chunk_size=100,
         )
 
     # Remove old HTMLFiles from ElasticSearch