From 01f5c42a2fe0cc234ae95f9d239cbb5018d0217e Mon Sep 17 00:00:00 2001
From: Eric Holscher <eric@ericholscher.com>
Date: Wed, 6 Dec 2017 21:54:21 -0800
Subject: [PATCH 1/9] Upgrade search app to Elastic Search 5.x

---
 .../commands/reindex_elasticsearch.py         |  4 +-
 readthedocs/restapi/urls.py                   |  2 +-
 readthedocs/restapi/utils.py                  |  3 +-
 readthedocs/search/indexes.py                 | 73 +++++++++----------
 readthedocs/settings/base.py                  |  1 +
 readthedocs/urls.py                           |  8 +-
 requirements/pip.txt                          |  3 +-
 7 files changed, 48 insertions(+), 46 deletions(-)

diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index b736a1cd426..0e85ee7be9f 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -29,7 +29,7 @@ def handle(self, *args, **options):
         """Build/index all versions or a single project's version"""
         project = options['project']
 
-        queryset = Version.objects.all()
+        queryset = Version.objects.filter(active=True)
 
         if project:
             queryset = queryset.filter(project__slug=project)
@@ -37,7 +37,7 @@ def handle(self, *args, **options):
                 raise CommandError(
                     'No project with slug: {slug}'.format(slug=project))
             log.info("Building all versions for %s", project)
-        elif getattr(settings, 'INDEX_ONLY_LATEST', True):
+        if getattr(settings, 'INDEX_ONLY_LATEST', True):
             queryset = queryset.filter(slug=LATEST)
 
         for version in queryset:
diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 5480ce98093..08315000a1c 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -50,7 +50,7 @@
     url(r'index_search/',
         search_views.index_search,
         name='index_search'),
-    url(r'search/$', views.search_views.search, name='api_search'),
+    url(r'^search/$', views.search_views.search, name='api_search'),
     url(r'search/project/$',
         search_views.project_search,
         name='api_project_search'),
diff --git a/readthedocs/restapi/utils.py b/readthedocs/restapi/utils.py
index 00e9ec15937..74eb6d8b413 100644
--- a/readthedocs/restapi/utils.py
+++ b/readthedocs/restapi/utils.py
@@ -161,12 +161,11 @@ def index_search_request(
             for route in routes:
                 section_obj.bulk_index(
                     section_index_list,
-                    parent=page_id,
                     routing=route,
                 )
 
     for route in routes:
-        page_obj.bulk_index(index_list, parent=project.slug, routing=route)
+        page_obj.bulk_index(index_list, routing=route)
 
     if delete:
         log.info('Deleting files not in commit: %s', commit)
diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index 1b2ede6aaa9..a3a409e7e1f 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -19,7 +19,7 @@
 import datetime
 
 from elasticsearch import Elasticsearch, exceptions
-from elasticsearch.helpers import bulk_index
+from elasticsearch.helpers import bulk
 
 from django.conf import settings
 
@@ -48,8 +48,6 @@ def get_settings(self, settings_override=None):
             'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS,
             'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
             'refresh_interval': '5s',
-            'store.compress.tv': True,
-            'store.compress.stored': True,
             'analysis': self.get_analysis(),
         }
         if settings_override:
@@ -139,7 +137,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None,
             docs.append(doc)
 
         # TODO: This doesn't work with the new ES setup.
-        bulk_index(self.es, docs, chunk_size=chunk_size)
+        bulk(self.es, docs, chunk_size=chunk_size)
 
     def index_document(self, data, index=None, parent=None, routing=None):
         doc = self.extract_document(data)
@@ -220,25 +218,24 @@ def get_mapping(self):
                 # Disable _all field to reduce index size.
                 '_all': {'enabled': False},
                 'properties': {
-                    'id': {'type': 'long'},
-                    'name': {'type': 'string', 'analyzer': 'default_icu'},
-                    'description': {'type': 'string', 'analyzer': 'default_icu'},
-
-                    'slug': {'type': 'string', 'index': 'not_analyzed'},
-                    'lang': {'type': 'string', 'index': 'not_analyzed'},
-                    'tags': {'type': 'string', 'index': 'not_analyzed'},
-                    'privacy': {'type': 'string', 'index': 'not_analyzed'},
+                    'id': {'type': 'keyword'},
+                    'name': {'type': 'text', 'analyzer': 'default_icu'},
+                    'description': {'type': 'text', 'analyzer': 'default_icu'},
+
+                    'slug': {'type': 'keyword'},
+                    'lang': {'type': 'keyword'},
+                    'tags': {'type': 'keyword'},
+                    'privacy': {'type': 'keyword'},
                     'author': {
-                        'type': 'string',
+                        'type': 'text',
                         'analyzer': 'default_icu',
                         'fields': {
                             'raw': {
-                                'type': 'string',
-                                'index': 'not_analyzed',
+                                'type': 'keyword',
                             },
                         },
                     },
-                    'url': {'type': 'string', 'index': 'not_analyzed'},
+                    'url': {'type': 'keyword'},
                     # Add a weight field to enhance relevancy scoring.
                     'weight': {'type': 'float'},
                 }
@@ -273,19 +270,19 @@ def get_mapping(self):
                 # Disable _all field to reduce index size.
                 '_all': {'enabled': False},
                 # Associate a page with a project.
-                '_parent': {'type': self._parent},
+                # '_parent': {'type': self._parent},
                 'properties': {
-                    'id': {'type': 'string', 'index': 'not_analyzed'},
-                    'sha': {'type': 'string', 'index': 'not_analyzed'},
-                    'project': {'type': 'string', 'index': 'not_analyzed'},
-                    'version': {'type': 'string', 'index': 'not_analyzed'},
-                    'path': {'type': 'string', 'index': 'not_analyzed'},
-                    'taxonomy': {'type': 'string', 'index': 'not_analyzed'},
-                    'commit': {'type': 'string', 'index': 'not_analyzed'},
-
-                    'title': {'type': 'string', 'analyzer': 'default_icu'},
-                    'headers': {'type': 'string', 'analyzer': 'default_icu'},
-                    'content': {'type': 'string', 'analyzer': 'default_icu'},
+                    'id': {'type': 'keyword'},
+                    'sha': {'type': 'keyword'},
+                    'project': {'type': 'keyword'},
+                    'version': {'type': 'keyword'},
+                    'path': {'type': 'keyword'},
+                    'taxonomy': {'type': 'keyword'},
+                    'commit': {'type': 'keyword'},
+
+                    'title': {'type': 'text', 'analyzer': 'default_icu'},
+                    'headers': {'type': 'text', 'analyzer': 'default_icu'},
+                    'content': {'type': 'text', 'analyzer': 'default_icu'},
                     # Add a weight field to enhance relevancy scoring.
                     'weight': {'type': 'float'},
                 }
@@ -321,7 +318,7 @@ def get_mapping(self):
                 # Disable _all field to reduce index size.
                 '_all': {'enabled': False},
                 # Associate a section with a page.
-                '_parent': {'type': self._parent},
+                # '_parent': {'type': self._parent},
                 # Commenting this out until we need it.
                 # 'suggest': {
                 #     "type": "completion",
@@ -330,18 +327,18 @@ def get_mapping(self):
                 #     "payloads": True,
                 # },
                 'properties': {
-                    'id': {'type': 'string', 'index': 'not_analyzed'},
-                    'project': {'type': 'string', 'index': 'not_analyzed'},
-                    'version': {'type': 'string', 'index': 'not_analyzed'},
-                    'path': {'type': 'string', 'index': 'not_analyzed'},
-                    'page_id': {'type': 'string', 'index': 'not_analyzed'},
-                    'commit': {'type': 'string', 'index': 'not_analyzed'},
-                    'title': {'type': 'string', 'analyzer': 'default_icu'},
-                    'content': {'type': 'string', 'analyzer': 'default_icu'},
+                    'id': {'type': 'keyword'},
+                    'project': {'type': 'keyword'},
+                    'version': {'type': 'keyword'},
+                    'path': {'type': 'keyword'},
+                    'page_id': {'type': 'keyword'},
+                    'commit': {'type': 'keyword'},
+                    'title': {'type': 'text', 'analyzer': 'default_icu'},
+                    'content': {'type': 'text', 'analyzer': 'default_icu'},
                     'blocks': {
                         'type': 'object',
                         'properties': {
-                            'code': {'type': 'string', 'analyzer': 'default_icu'}
+                            'code': {'type': 'text', 'analyzer': 'default_icu'}
                         }
                     },
                     # Add a weight field to enhance relevancy scoring.
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 8057901534f..31754880ee0 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -120,6 +120,7 @@ def INSTALLED_APPS(self):  # noqa
             apps.append('django_countries')
             apps.append('readthedocsext.donate')
             apps.append('readthedocsext.embed')
+            apps.append('readthedocsext.search')
         return apps
 
     @property
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index b6053ab1983..25e04135d84 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -88,7 +88,7 @@
 
 if settings.USE_PROMOS:
     # Include donation URL's
-    groups.append([
+    groups.insert(0, [
         url(r'^sustainability/', include('readthedocsext.donate.urls')),
     ])
 
@@ -98,6 +98,12 @@
         url(r'^api/v1/embed/', include('readthedocsext.embed.urls'))
     )
 
+if 'readthedocsext.search' in settings.INSTALLED_APPS:
+    for num, _url in enumerate(rtd_urls):
+        if _url and hasattr(_url, 'name') and _url.name == 'search':
+            rtd_urls[num] = \
+                url(r'^search/', 'readthedocsext.search.mainsearch.elastic_search', name='search'),
+
 if not getattr(settings, 'USE_SUBDOMAIN', False) or settings.DEBUG:
     groups.insert(0, docs_urls)
 if getattr(settings, 'ALLOW_ADMIN', True):
diff --git a/requirements/pip.txt b/requirements/pip.txt
index a6dc76d9588..9b3fb8072af 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -51,8 +51,7 @@ httplib2==0.10.3
 GitPython==2.1.8
 
 # Search
-elasticsearch==1.5.0
-pyelasticsearch==0.7.1
+elasticsearch==5.5.2
 pyquery==1.4.0
 
 # Utils

From 953ae34b03c255d5f382d6f32b446d4dcc9e2365 Mon Sep 17 00:00:00 2001
From: Eric Holscher <eric@ericholscher.com>
Date: Tue, 12 Dec 2017 15:20:54 -0800
Subject: [PATCH 2/9] Default sections off and fix unicode on index

---
 .../core/management/commands/reindex_elasticsearch.py     | 8 ++++----
 readthedocs/restapi/views/search_views.py                 | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index 0e85ee7be9f..686afa77a40 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -35,13 +35,13 @@ def handle(self, *args, **options):
             queryset = queryset.filter(project__slug=project)
             if not queryset.exists():
                 raise CommandError(
-                    'No project with slug: {slug}'.format(slug=project))
-            log.info("Building all versions for %s", project)
+                    u'No project with slug: {slug}'.format(slug=project))
+            log.info(u"Building all versions for %s", project)
         if getattr(settings, 'INDEX_ONLY_LATEST', True):
             queryset = queryset.filter(slug=LATEST)
 
         for version in queryset:
-            log.info("Reindexing %s", version)
+            log.info(u"Reindexing %s", version)
             try:
                 commit = version.project.vcs_repo(version.slug).commit
             except:  # pylint: disable=bare-except
@@ -53,4 +53,4 @@ def handle(self, *args, **options):
                 update_search(version.pk, commit,
                               delete_non_commit_files=False)
             except Exception:
-                log.exception('Reindex failed for %s', version)
+                log.exception(u'Reindex failed for %s', version)
diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py
index abe36174097..1db28af08e2 100644
--- a/readthedocs/restapi/views/search_views.py
+++ b/readthedocs/restapi/views/search_views.py
@@ -32,7 +32,7 @@ def index_search(request):
 
     utils.index_search_request(
         version=version, page_list=data['page_list'], commit=commit,
-        project_scale=project_scale, page_scale=page_scale)
+        project_scale=project_scale, page_scale=page_scale, section=False)
 
     return Response({'indexed': True})
 

From d8761d8fe321913f756e3daa95afe443bdf1cb00 Mon Sep 17 00:00:00 2001
From: Eric Holscher <eric@ericholscher.com>
Date: Tue, 19 Dec 2017 10:29:28 -0800
Subject: [PATCH 3/9] Run task via celery to do concurrency

---
 .../core/management/commands/reindex_elasticsearch.py     | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index 686afa77a40..02580287451 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import
 import logging
 from optparse import make_option
+import socket
 
 from django.core.management.base import BaseCommand
 from django.core.management.base import CommandError
@@ -38,6 +39,7 @@ def handle(self, *args, **options):
                     u'No project with slug: {slug}'.format(slug=project))
             log.info(u"Building all versions for %s", project)
         if getattr(settings, 'INDEX_ONLY_LATEST', True):
+            log.warning('Indexing only latest')
             queryset = queryset.filter(slug=LATEST)
 
         for version in queryset:
@@ -50,7 +52,9 @@ def handle(self, *args, **options):
                 commit = None
 
             try:
-                update_search(version.pk, commit,
-                              delete_non_commit_files=False)
+                update_search.apply_async(args=[version.pk, commit],
+                              kwargs=dict(delete_non_commit_files=False),
+                              priority=0,
+                              queue=socket.gethostname())
             except Exception:
                 log.exception(u'Reindex failed for %s', version)

From 6e1c91e5aae00f5e40ef18de398c65b7c1199ad5 Mon Sep 17 00:00:00 2001
From: Eric Holscher <eric@ericholscher.com>
Date: Tue, 19 Dec 2017 10:55:48 -0800
Subject: [PATCH 4/9] Lint reindex_elasticsearch

---
 .../commands/reindex_elasticsearch.py         | 52 +++++++++++--------
 1 file changed, 29 insertions(+), 23 deletions(-)

diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index 02580287451..24927a02f3e 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -1,13 +1,14 @@
-"""Reindex Elastic Search indexes"""
+# -*- coding: utf-8 -*-
+"""Reindex Elastic Search indexes."""
+
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals)
 
-from __future__ import absolute_import
 import logging
-from optparse import make_option
 import socket
+from optparse import make_option
 
-from django.core.management.base import BaseCommand
-from django.core.management.base import CommandError
-from django.conf import settings
+from django.core.management.base import BaseCommand, CommandError
 
 from readthedocs.builds.constants import LATEST
 from readthedocs.builds.models import Version
@@ -24,11 +25,17 @@ class Command(BaseCommand):
                     dest='project',
                     default='',
                     help='Project to index'),
+        make_option('-l',
+                    dest='only_latest',
+                    default=False,
+                    action='store_true',
+                    help='Only index latest'),
     )
 
     def handle(self, *args, **options):
-        """Build/index all versions or a single project's version"""
+        """Build/index all versions or a single project's version."""
         project = options['project']
+        only_latest = options['only_latest']
 
         queryset = Version.objects.filter(active=True)
 
@@ -37,24 +44,23 @@ def handle(self, *args, **options):
             if not queryset.exists():
                 raise CommandError(
                     u'No project with slug: {slug}'.format(slug=project))
-            log.info(u"Building all versions for %s", project)
-        if getattr(settings, 'INDEX_ONLY_LATEST', True):
+            log.info(u'Building all versions for %s', project)
+        if only_latest:
             log.warning('Indexing only latest')
             queryset = queryset.filter(slug=LATEST)
 
-        for version in queryset:
-            log.info(u"Reindexing %s", version)
-            try:
-                commit = version.project.vcs_repo(version.slug).commit
-            except:  # pylint: disable=bare-except
-                # An exception can be thrown here in production, but it's not
-                # documented what the exception here is
-                commit = None
-
+        for version_pk, version_slug, project_slug in queryset.values_list(
+                'pk', 'slug', 'project__slug'):
+            log.info(u'Reindexing %s:%s' % (project_slug, version_slug))
             try:
-                update_search.apply_async(args=[version.pk, commit],
-                              kwargs=dict(delete_non_commit_files=False),
-                              priority=0,
-                              queue=socket.gethostname())
+                update_search.apply_async(
+                    kwargs=dict(
+                        version_pk=version_pk,
+                        commit='reindex',
+                        delete_non_commit_files=False
+                    ),
+                    priority=0,
+                    queue=socket.gethostname()
+                )
             except Exception:
-                log.exception(u'Reindex failed for %s', version)
+                log.exception(u'Reindexing failed for %s:%s' % (project_slug, version_slug))

From 2a2e0a42dbe1f767149ed78c77d5b970d2ff4fae Mon Sep 17 00:00:00 2001
From: Eric Holscher <eric@ericholscher.com>
Date: Wed, 20 Dec 2017 14:37:47 -0800
Subject: [PATCH 5/9] Make reindexing better

---
 readthedocs/search/indexes.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index a3a409e7e1f..378774e3b9e 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -74,7 +74,7 @@ def get_analysis(self):
         analyzers['default_icu'] = {
             'type': 'custom',
             'tokenizer': 'icu_tokenizer',
-            'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'],
+            'filter': ['custom_word_delimiter', 'icu_folding', 'icu_normalizer', 'lowercase'],
         }
 
         # Customize the word_delimiter filter to set various options.

From cc7008216dd876b7f9e5a1c1747c782fc41bd892 Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Tue, 13 Mar 2018 09:58:26 +0100
Subject: [PATCH 6/9] search: don't index page id

As elasticsearch expects a long while we have an hash as string.
While at it remove references to _parent and _id as they are gone.
---
 readthedocs/search/indexes.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index 378774e3b9e..0184b086292 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -127,11 +127,8 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None,
             doc = {
                 '_index': index,
                 '_type': self._type,
-                '_id': source['id'],
                 '_source': source,
             }
-            if parent:
-                doc['_parent'] = parent
             if routing:
                 doc['_routing'] = routing
             docs.append(doc)
@@ -294,7 +291,7 @@ def get_mapping(self):
     def extract_document(self, data):
         doc = {}
 
-        attrs = ('id', 'project', 'title', 'headers', 'version', 'path',
+        attrs = ('project', 'title', 'headers', 'version', 'path',
                  'content', 'taxonomy', 'commit')
         for attr in attrs:
             doc[attr] = data.get(attr, '')

From 1b90a0117933b98e3d95d29a8dadf9dbf2d7504f Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Tue, 13 Mar 2018 10:45:57 +0100
Subject: [PATCH 7/9] search: fixup elasticsearch query syntax for 5.5

fields is now _source and filter should be inside the
query. aggs is the new facets and we can drop filtering on aggs
as they should be calculated on the query results so after filtering.
---
 readthedocs/projects/views/public.py      | 19 +++---
 readthedocs/restapi/views/search_views.py |  7 ++-
 readthedocs/search/lib.py                 | 70 +++++++++--------------
 readthedocs/search/views.py               | 15 +++--
 4 files changed, 52 insertions(+), 59 deletions(-)

diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py
index 61f6cfeca64..6db89a93ce0 100644
--- a/readthedocs/projects/views/public.py
+++ b/readthedocs/projects/views/public.py
@@ -305,6 +305,10 @@ def elastic_project_search(request, project_slug):
                         {'match': {'title': {'query': query, 'boost': 10}}},
                         {'match': {'headers': {'query': query, 'boost': 5}}},
                         {'match': {'content': {'query': query}}},
+                    ],
+                    'filter': [
+                        {'term': {'project': project_slug}},
+                        {'term': {'version': version_slug}},
                     ]
                 }
             },
@@ -315,13 +319,7 @@ def elastic_project_search(request, project_slug):
                     'content': {},
                 }
             },
-            'fields': ['title', 'project', 'version', 'path'],
-            'filter': {
-                'and': [
-                    {'term': {'project': project_slug}},
-                    {'term': {'version': version_slug}},
-                ]
-            },
+            '_source': ['title', 'project', 'version', 'path'],
             'size': 50,  # TODO: Support pagination.
         }
 
@@ -335,9 +333,12 @@ def elastic_project_search(request, project_slug):
     if results:
         # pre and post 1.0 compat
         for num, hit in enumerate(results['hits']['hits']):
-            for key, val in list(hit['fields'].items()):
+            for key, val in list(hit['_source'].items()):
                 if isinstance(val, list):
-                    results['hits']['hits'][num]['fields'][key] = val[0]
+                    results['hits']['hits'][num]['_source'][key] = val[0]
+            # we cannot render attributes starting with an underscore
+            hit['fields'] = hit['_source']
+            del hit['_source']
 
     return render(
         request,
diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py
index 1db28af08e2..a0d64b401bd 100644
--- a/readthedocs/restapi/views/search_views.py
+++ b/readthedocs/restapi/views/search_views.py
@@ -64,7 +64,7 @@ def search(request):
     # Supplement result paths with domain information on project
     hits = results.get('hits', {}).get('hits', [])
     for (n, hit) in enumerate(hits):
-        fields = hit.get('fields', {})
+        fields = hit.get('_source', {})
         search_project = fields.get('project')[0]
         search_version = fields.get('version')[0]
         path = fields.get('path')[0]
@@ -77,9 +77,12 @@ def search(request):
                 )
             except ProjectRelationship.DoesNotExist:
                 pass
-        results['hits']['hits'][n]['fields']['link'] = (
+        results['hits']['hits'][n]['_source']['link'] = (
             canonical_url + path
         )
+        # we cannot render attributes starting with an underscore
+        results['hits']['hits'][n]['fields'] = results['hits']['hits'][n]['_source']
+        del results['hits']['hits'][n]['_source']
 
     return Response({'results': results})
 
diff --git a/readthedocs/search/lib.py b/readthedocs/search/lib.py
index 8500a829b03..5ace5d84094 100644
--- a/readthedocs/search/lib.py
+++ b/readthedocs/search/lib.py
@@ -25,9 +25,9 @@ def search_project(request, query, language=None):
                 ]
             },
         },
-        "facets": {
+        "aggs": {
             "language": {
-                "terms": {"field": "lang"},
+                "terms": {"field": "lang.keyword"},
             },
         },
         "highlight": {
@@ -36,13 +36,12 @@ def search_project(request, query, language=None):
                 "description": {},
             }
         },
-        "fields": ["name", "slug", "description", "lang", "url"],
+        "_source": ["name", "slug", "description", "lang", "url"],
         "size": 50  # TODO: Support pagination.
     }
 
     if language:
-        body['facets']['language']['facet_filter'] = {"term": {"lang": language}}
-        body['filter'] = {"term": {"lang": language}}
+        body['query']['bool']['filter'] = {"term": {"lang": language}}
 
     before_project_search.send(request=request, sender=ProjectIndex, body=body)
 
@@ -89,15 +88,15 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
                 ]
             }
         },
-        "facets": {
+        "aggs": {
             "taxonomy": {
-                "terms": {"field": "taxonomy"},
+                "terms": {"field": "taxonomy.keyword"},
             },
             "project": {
-                "terms": {"field": "project"},
+                "terms": {"field": "project.keyword"},
             },
             "version": {
-                "terms": {"field": "version"},
+                "terms": {"field": "version.keyword"},
             },
         },
         "highlight": {
@@ -107,12 +106,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
                 "content": {},
             }
         },
-        "fields": ["title", "project", "version", "path"],
+        "_source": ["title", "project", "version", "path"],
         "size": 50  # TODO: Support pagination.
     }
 
     if project_slug or version_slug or taxonomy:
-        final_filter = {"and": []}
+        final_filter = []
 
         if project_slug:
             try:
@@ -126,7 +125,7 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
                                      in Project.objects.public(
                                          request.user).filter(
                                          superprojects__parent__slug=project.slug))
-                final_filter['and'].append({"terms": {"project": project_slugs}})
+                final_filter.append({"terms": {"project": project_slugs}})
 
                 # Add routing to optimize search by hitting the right shard.
                 # This purposely doesn't apply routing if the project has more
@@ -141,15 +140,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
                 return None
 
         if version_slug:
-            final_filter['and'].append({'term': {'version': version_slug}})
+            final_filter.append({'term': {'version': version_slug}})
 
         if taxonomy:
-            final_filter['and'].append({'term': {'taxonomy': taxonomy}})
+            final_filter.append({'term': {'taxonomy': taxonomy}})
 
-        body['filter'] = final_filter
-        body['facets']['project']['facet_filter'] = final_filter
-        body['facets']['version']['facet_filter'] = final_filter
-        body['facets']['taxonomy']['facet_filter'] = final_filter
+        body['query']['bool']['filter'] = final_filter
 
     if settings.DEBUG:
         print("Before Signal")
@@ -167,9 +163,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
     """
     Search for a section of content.
 
-    When you search, you will have a ``project`` facet, which includes the
+    When you search, you will have a ``project`` facet (aggs), which includes the
     number of matching sections per project. When you search inside a project,
-    the ``path`` facet will show the number of matching sections per page.
+    the ``path`` aggs will show the number of matching sections per page.
 
     :param request: Request instance
     :param query: string to use in query
@@ -198,12 +194,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
                 ]
             }
         },
-        "facets": {
+        "aggs": {
             "project": {
-                "terms": {"field": "project"},
-                "facet_filter": {
-                    "term": {"version": version_slug},
-                }
+                "terms": {"field": "project.keyword"},
             },
         },
         "highlight": {
@@ -212,36 +205,29 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
                 "content": {},
             }
         },
-        "fields": ["title", "project", "version", "path", "page_id", "content"],
+        "_source": ["title", "project", "version", "path", "page_id", "content"],
         "size": 10  # TODO: Support pagination.
     }
 
     if project_slug:
-        body['filter'] = {
-            "and": [
-                {"term": {"project": project_slug}},
-                {"term": {"version": version_slug}},
-            ]
-        }
-        body['facets']['path'] = {
+        body['query']['bool']['filter'] = [
+            {"term": {"project": project_slug}},
+            {"term": {"version": version_slug}},
+        ]
+        body['aggs']['path'] = {
             "terms": {"field": "path"},
-            "facet_filter": {
-                "term": {"project": project_slug},
-            }
         },
         # Add routing to optimize search by hitting the right shard.
         kwargs['routing'] = project_slug
 
     if path:
-        body['filter'] = {
-            "and": [
-                {"term": {"path": path}},
-            ]
-        }
+        body['query']['bool']['filter'] = [
+            {"term": {"path": path}},
+        ]
 
     if path and not project_slug:
         # Show facets when we only have a path
-        body['facets']['path'] = {
+        body['aggs']['path'] = {
             "terms": {"field": "path"}
         }
 
diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
index 7d3a51d5fc2..2cf7a057218 100644
--- a/readthedocs/search/views.py
+++ b/readthedocs/search/views.py
@@ -55,16 +55,19 @@ def elastic_search(request):
     if results:
         # pre and post 1.0 compat
         for num, hit in enumerate(results['hits']['hits']):
-            for key, val in list(hit['fields'].items()):
+            for key, val in list(hit['_source'].items()):
                 if isinstance(val, list):
-                    results['hits']['hits'][num]['fields'][key] = val[0]
+                    results['hits']['hits'][num]['_source'][key] = val[0]
+            # we cannot render attributes starting with an underscore
+            hit['fields'] = hit['_source']
+            del hit['_source']
 
-        if 'facets' in results:
+        if 'aggregations' in results:
             for facet_type in ['project', 'version', 'taxonomy', 'language']:
-                if facet_type in results['facets']:
+                if facet_type in results['aggregations']:
                     facets[facet_type] = collections.OrderedDict()
-                    for term in results['facets'][facet_type]['terms']:
-                        facets[facet_type][term['term']] = term['count']
+                    for term in results['aggregations'][facet_type]['buckets']:
+                        facets[facet_type][term['key']] = term['doc_count']
 
     if settings.DEBUG:
         print(pprint(results))

From 27915aced5bed72c66b66ee516038572f6a77a3e Mon Sep 17 00:00:00 2001
From: Riccardo Magliocchetti <riccardo.magliocchetti@gmail.com>
Date: Tue, 13 Mar 2018 15:34:18 +0100
Subject: [PATCH 8/9] search: reinstate the relationship mapping in the index

---
 readthedocs/search/indexes.py | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index 0184b086292..2a0b725c0fe 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -266,8 +266,6 @@ def get_mapping(self):
             self._type: {
                 # Disable _all field to reduce index size.
                 '_all': {'enabled': False},
-                # Associate a page with a project.
-                # '_parent': {'type': self._parent},
                 'properties': {
                     'id': {'type': 'keyword'},
                     'sha': {'type': 'keyword'},
@@ -282,6 +280,13 @@ def get_mapping(self):
                     'content': {'type': 'text', 'analyzer': 'default_icu'},
                     # Add a weight field to enhance relevancy scoring.
                     'weight': {'type': 'float'},
+                    # Associate a page with a project.
+                    self._parent: {
+                        'type': 'join',
+                        'relations': {
+                            self._parent: self._type
+                        }
+                    },
                 }
             }
         }
@@ -314,8 +319,6 @@ def get_mapping(self):
             self._type: {
                 # Disable _all field to reduce index size.
                 '_all': {'enabled': False},
-                # Associate a section with a page.
-                # '_parent': {'type': self._parent},
                 # Commenting this out until we need it.
                 # 'suggest': {
                 #     "type": "completion",
@@ -340,6 +343,13 @@ def get_mapping(self):
                     },
                     # Add a weight field to enhance relevancy scoring.
                     'weight': {'type': 'float'},
+                    # Associate a section with a page.
+                    self._parent: {
+                        'type': 'join',
+                        'relations': {
+                            self._parent: self._type
+                        }
+                    },
                 }
             }
         }

From 0f5c4c4c1737c5e93d4ccb13cb0209bf8e74739d Mon Sep 17 00:00:00 2001
From: Paolo Romolini <p.romolini@nephila.it>
Date: Tue, 20 Mar 2018 12:31:45 +0100
Subject: [PATCH 9/9] tests: first stab at unit testing search

You got to start from somwhere
---
 .../rtd_tests/mocks/search_mock_responses.py  | 160 ++++++++++++++++++
 readthedocs/rtd_tests/tests/test_search.py    | 150 ++++++++++++++++
 2 files changed, 310 insertions(+)
 create mode 100644 readthedocs/rtd_tests/mocks/search_mock_responses.py
 create mode 100644 readthedocs/rtd_tests/tests/test_search.py

diff --git a/readthedocs/rtd_tests/mocks/search_mock_responses.py b/readthedocs/rtd_tests/mocks/search_mock_responses.py
new file mode 100644
index 00000000000..2ade8103c52
--- /dev/null
+++ b/readthedocs/rtd_tests/mocks/search_mock_responses.py
@@ -0,0 +1,160 @@
+search_project_response = """
+{
+    "took": 17,
+    "timed_out": false,
+    "_shards": {
+        "total": 5,
+        "successful": 5,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": 1,
+        "max_score": 1.8232156,
+        "hits": [
+            {
+                "_index": "readthedocs",
+                "_type": "project",
+                "_id": "6",
+                "_score": 1.8232156,
+                "_source": {
+                    "name": "Pip",
+                    "description": "",
+                    "lang": "en",
+                    "url": "/projects/pip/",
+                    "slug": "pip"
+                },
+                "highlight": {
+                    "name": [
+                        "<em>Pip</em>"
+                    ]
+                }
+            }
+        ]
+    },
+    "aggregations": {
+        "language": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": [
+                {
+                    "key": "en",
+                    "doc_count": 1
+                }
+            ]
+        }
+    }
+}
+"""
+
+search_file_response = """
+{
+    "took": 27,
+    "timed_out": false,
+    "_shards": {
+        "total": 5,
+        "successful": 5,
+        "skipped": 0,
+        "failed": 0
+    },
+    "hits": {
+        "total": 3,
+        "max_score": 6.989019,
+        "hits": [
+            {
+                "_index": "readthedocs",
+                "_type": "page",
+                "_id": "AWKuy4jp-H7vMtbTbHP5",
+                "_score": 6.989019,
+                "_routing": "prova",
+                "_source": {
+                    "path": "_docs/cap2",
+                    "project": "prova",
+                    "title": "Capitolo 2",
+                    "version": "latest"
+                },
+                "highlight": {
+                    "headers": [
+                        "<em>Capitolo</em> 2"
+                    ],
+                    "title": [
+                        "<em>Capitolo</em> 2"
+                    ],
+                    "content": [
+                        "<em>Capitolo</em> 2  In questo <em>capitolo</em>, vengono trattate"
+                    ]
+                }
+            },
+            {
+                "_index": "readthedocs",
+                "_type": "page",
+                "_id": "AWKuy4jp-H7vMtbTbHP4",
+                "_score": 6.973402,
+                "_routing": "prova",
+                "_source": {
+                    "path": "_docs/cap1",
+                    "project": "prova",
+                    "title": "Capitolo 1",
+                    "version": "latest"
+                },
+                "highlight": {
+                    "headers": [
+                        "<em>Capitolo</em> 1"
+                    ],
+                    "title": [
+                        "<em>Capitolo</em> 1"
+                    ],
+                    "content": [
+                        "<em>Capitolo</em> 1  In questo <em>capitolo</em>, le funzioni principali"
+                    ]
+                }
+            },
+            {
+                "_index": "readthedocs",
+                "_type": "page",
+                "_id": "AWKuy4jp-H7vMtbTbHP3",
+                "_score": 0.2017303,
+                "_routing": "prova",
+                "_source": {
+                    "path": "index",
+                    "project": "prova",
+                    "title": "Titolo del documento",
+                    "version": "latest"
+                },
+                "highlight": {
+                    "content": [
+                        "Titolo del documento  Nel <em>Capitolo</em> 1 Nel <em>Capitolo</em> 2"
+                    ]
+                }
+            }
+        ]
+    },
+    "aggregations": {
+        "project": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": [
+                {
+                    "key": "prova",
+                    "doc_count": 3
+                }
+            ]
+        },
+        "taxonomy": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": []
+        },
+        "version": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": [
+                {
+                    "key": "latest",
+                    "doc_count": 3
+                }
+            ]
+        }
+    }
+}
+"""
diff --git a/readthedocs/rtd_tests/tests/test_search.py b/readthedocs/rtd_tests/tests/test_search.py
new file mode 100644
index 00000000000..01096a098c7
--- /dev/null
+++ b/readthedocs/rtd_tests/tests/test_search.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+from __future__ import (
+    absolute_import, division, print_function, unicode_literals)
+
+import json
+
+from django.core.urlresolvers import reverse
+from django.test import TestCase, RequestFactory
+from mock import patch
+from urllib3._collections import HTTPHeaderDict
+
+from readthedocs.projects.models import Project
+from readthedocs.rtd_tests.mocks.search_mock_responses import (
+    search_project_response, search_file_response
+)
+
+
+class TestSearch(TestCase):
+    fixtures = ['eric', 'test_data']
+
+    def setUp(self):
+        self.client.login(username='eric', password='test')
+        self.pip = Project.objects.get(slug='pip')
+        self.factory = RequestFactory()
+
+    def perform_request_file_mock(self, method, url, params=None, body=None, timeout=None, ignore=()):
+        """
+        Elastic Search Urllib3HttpConnection mock for file search
+        """
+        headers = HTTPHeaderDict({
+            'content-length': '893',
+            'content-type': 'application/json; charset=UTF-8'
+            })
+        raw_data = search_file_response
+        return 200, headers, raw_data
+
+    def perform_request_project_mock(self, method, url, params=None, body=None, timeout=None, ignore=()):
+        """
+        Elastic Search Urllib3HttpConnection mock for project search
+        """
+        headers = HTTPHeaderDict({
+            'content-length': '893',
+            'content-type': 'application/json; charset=UTF-8'
+        })
+        raw_data = search_project_response
+        return 200, headers, raw_data
+
+    @patch(
+        'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+        side_effect=perform_request_project_mock
+    )
+    def test_search_project(self, perform_request_mock):
+        """
+        Tests the search view (by project) by mocking the perform request method
+        of the elastic search module. Checks the query string provided
+        to elastic search.
+        """
+        self.client.login(username='eric', password='test')
+        r = self.client.get(
+            reverse('search'),
+            {'q': 'pip', 'type': 'project', 'project': None}
+        )
+        self.assertEqual(r.status_code, 200)
+        response = perform_request_mock.call_args_list[0][0][3]
+        query_dict = json.loads(response)
+        self.assertIn('query', query_dict)
+        self.assertDictEqual({
+            'bool': {
+                'should': [
+                    {'match': {'name': {'query': 'pip', 'boost': 10}}},
+                    {'match': {'description': {'query': 'pip'}}}
+                ]
+            }
+        }, query_dict['query'])
+        main_hit = r.context['results']['hits']['hits'][0]
+        self.assertEqual(r.status_code, 200)
+        self.assertEqual(main_hit['_type'], 'project')
+        self.assertEqual(main_hit['_type'], 'project')
+        self.assertEqual(main_hit['fields']['name'], 'Pip')
+        self.assertEqual(main_hit['fields']['slug'], 'pip')
+
+    @patch(
+        'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+        side_effect=perform_request_file_mock
+    )
+    def test_search_file(self, perform_request_mock):
+        """
+        Tests the search view (by file) by mocking the perform request method
+        of the elastic search module. Checks the query string provided
+        to elastic search.
+        """
+        self.client.login(username='eric', password='test')
+        r = self.client.get(
+            reverse('search'),
+            {'q': 'capitolo', 'type': 'file'}
+        )
+        response = perform_request_mock.call_args_list[0][0][3]
+        query_dict = json.loads(response)
+        self.assertIn('query', query_dict)
+        self.assertDictEqual({
+            'bool': {
+                'filter': [{'term': {'version': 'latest'}}],
+                'should': [
+                    {'match_phrase': {'title': {'query': 'capitolo', 'boost': 10, 'slop': 2}}},
+                    {'match_phrase': {'headers': {'query': 'capitolo', 'boost': 5, 'slop': 3}}},
+                    {'match_phrase': {'content': {'query': 'capitolo', 'slop': 5}}}
+                ]
+            }
+        }, query_dict['query'])
+        main_hit = r.context['results']['hits']['hits'][0]
+        self.assertEqual(r.status_code, 200)
+        self.assertEqual(main_hit['_type'], 'page')
+        self.assertEqual(main_hit['fields']['project'], 'prova')
+        self.assertEqual(main_hit['fields']['path'], '_docs/cap2')
+
+    @patch(
+        'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+        side_effect=perform_request_file_mock
+    )
+    def test_search_in_project(self, perform_request_mock):
+        """
+        Tests the search view (by file) by mocking the perform request method
+        of the elastic search module. Checks the query string provided
+        to elastic search.
+        """
+        self.client.login(username='eric', password='test')
+        r = self.client.get(
+            '/projects/pip/search/',
+            {'q': 'capitolo'}
+        )
+        response = perform_request_mock.call_args_list[0][0][3]
+        query_dict = json.loads(response)
+        self.assertDictEqual({
+            'bool': {
+                'should': [
+                    {'match': {'title': {'boost': 10, 'query': 'capitolo'}}},
+                    {'match': {'headers': {'boost': 5, 'query': 'capitolo'}}},
+                    {'match': {'content': {'query': 'capitolo'}}}
+                    ],
+                'filter': [
+                    {'term': {'project': 'pip'}},
+                    {'term': {'version': 'latest'}}
+                ]
+            }
+        }, query_dict['query'])
+        main_hit = r.context['results']['hits']['hits'][0]
+        self.assertEqual(r.status_code, 200)
+        self.assertEqual(main_hit['_type'], 'page')
+        self.assertEqual(main_hit['fields']['project'], 'prova')
+        self.assertEqual(main_hit['fields']['path'], '_docs/cap2')