From b38423d7c62a844fbfa23e6bd73f9ed569060fbc Mon Sep 17 00:00:00 2001
From: dojutsu-user <vaibhgupt199@gmail.com>
Date: Tue, 16 Jul 2019 23:18:06 +0530
Subject: [PATCH 1/4] fix indexing speedup

---
 .../commands/reindex_elasticsearch.py         | 36 ++++++++++++-------
 readthedocs/search/utils.py                   |  8 -----
 2 files changed, 23 insertions(+), 21 deletions(-)

diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py
index 32f9a4c8534..bc2c9965f47 100644
--- a/readthedocs/search/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/search/management/commands/reindex_elasticsearch.py
@@ -10,7 +10,6 @@
 
 from ...tasks import (index_objects_to_es, switch_es_index, create_new_es_index,
                       index_missing_objects)
-from ...utils import get_chunk
 
 log = logging.getLogger(__name__)
 
@@ -19,18 +18,29 @@ class Command(BaseCommand):
 
     @staticmethod
     def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_class):
-        total = queryset.count()
-        chunks = get_chunk(total, settings.ES_TASK_CHUNK_SIZE)
-
-        for chunk in chunks:
-            data = {
-                'app_label': app_label,
-                'model_name': model_name,
-                'document_class': document_class,
-                'index_name': index_name,
-                'chunk': chunk
-            }
-            yield index_objects_to_es.si(**data)
+        chunk_size = settings.ES_TASK_CHUNK_SIZE
+        qs_iterator = queryset.only('pk').iterator()
+        is_iterator_empty = False
+
+        data = {
+            'app_label': app_label,
+            'model_name': model_name,
+            'document_class': document_class,
+            'index_name': index_name,
+        }
+
+        while not is_iterator_empty:
+            objects_id = []
+
+            try:
+                for _ in range(chunk_size):
+                    objects_id.append(qs_iterator.__next__().pk)
+            except StopIteration:
+                is_iterator_empty = True
+
+            if objects_id:
+                data['objects_id']: objects_id
+                yield index_objects_to_es.si(**data)
 
     def _run_reindex_tasks(self, models, queue):
         apply_async_kwargs = {'priority': 0}
diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
index 0ff42ddcdd2..cf1f0fb73aa 100644
--- a/readthedocs/search/utils.py
+++ b/readthedocs/search/utils.py
@@ -94,14 +94,6 @@ def get_project_list_or_404(project_slug, user, version_slug=None):
     return project_list
 
 
-def get_chunk(total, chunk_size):
-    """Yield successive `chunk_size` chunks."""
-    # Based on https://stackoverflow.com/a/312464
-    # licensed under cc by-sa 3.0
-    for i in range(0, total, chunk_size):
-        yield (i, i + chunk_size)
-
-
 def _get_index(indices, index_name):
     """
     Get Index from all the indices.

From cbbbb424c44d07360bdc38cf6e440acf1b7b0732 Mon Sep 17 00:00:00 2001
From: dojutsu-user <vaibhgupt199@gmail.com>
Date: Tue, 16 Jul 2019 23:21:44 +0530
Subject: [PATCH 2/4] remove if

---
 .../search/management/commands/reindex_elasticsearch.py      | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py
index bc2c9965f47..c59a6a5dab9 100644
--- a/readthedocs/search/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/search/management/commands/reindex_elasticsearch.py
@@ -38,9 +38,8 @@ def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_cl
             except StopIteration:
                 is_iterator_empty = True
 
-            if objects_id:
-                data['objects_id']: objects_id
-                yield index_objects_to_es.si(**data)
+            data['objects_id'] = objects_id
+            yield index_objects_to_es.si(**data)
 
     def _run_reindex_tasks(self, models, queue):
         apply_async_kwargs = {'priority': 0}

From a6b8a1a6141b2efcd2c36d15fdc7eb675b8300b5 Mon Sep 17 00:00:00 2001
From: dojutsu-user <vaibhgupt199@gmail.com>
Date: Tue, 16 Jul 2019 23:35:42 +0530
Subject: [PATCH 3/4] use next()

---
 readthedocs/search/management/commands/reindex_elasticsearch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py
index c59a6a5dab9..fb2662ac2fb 100644
--- a/readthedocs/search/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/search/management/commands/reindex_elasticsearch.py
@@ -34,7 +34,7 @@ def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_cl
 
             try:
                 for _ in range(chunk_size):
-                    objects_id.append(qs_iterator.__next__().pk)
+                    objects_id.append(next(qs_iterator).pk)
             except StopIteration:
                 is_iterator_empty = True
 

From 985488d6afb6109bca4a7d988ec0a2ae73f47d50 Mon Sep 17 00:00:00 2001
From: dojutsu-user <vaibhgupt199@gmail.com>
Date: Tue, 16 Jul 2019 23:50:07 +0530
Subject: [PATCH 4/4] add logging

---
 .../search/management/commands/reindex_elasticsearch.py    | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/readthedocs/search/management/commands/reindex_elasticsearch.py b/readthedocs/search/management/commands/reindex_elasticsearch.py
index fb2662ac2fb..7c0ea6982cf 100644
--- a/readthedocs/search/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/search/management/commands/reindex_elasticsearch.py
@@ -34,7 +34,12 @@ def _get_indexing_tasks(app_label, model_name, index_name, queryset, document_cl
 
             try:
                 for _ in range(chunk_size):
-                    objects_id.append(next(qs_iterator).pk)
+                    pk = next(qs_iterator).pk
+                    objects_id.append(pk)
+
+                    if pk % 5000 == 0:
+                        log.info('Total: %s', pk)
+
             except StopIteration:
                 is_iterator_empty = True