readthedocs · ericholscher · Jun 19, 2018 · Jun 8, 2018 · Jun 8, 2018 · Jun 9, 2018
diff --git a/.travis.yml b/.travis.yml
@@ -4,7 +4,7 @@ python:
   - 3.6
 sudo: false
 env:
-  - ES_VERSION=1.3.9 ES_DOWNLOAD_URL=https://download.elastic.co/elasticsearch/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
+  - ES_VERSION=6.2.4 ES_DOWNLOAD_URL=https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-${ES_VERSION}.tar.gz
 matrix:
   include:
     - python: 2.7
@@ -42,3 +42,4 @@ notifications:
 branches:
   only:
   - master
+  - search_upgrade
diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
@@ -0,0 +1,45 @@
+from django_elasticsearch_dsl import DocType, Index, fields
+
+from readthedocs.projects.models import Project
+
+from readthedocs.search.faceted_search import ProjectSearch
+
+project_index = Index('project')
+
+project_index.settings(
+    number_of_shards=1,
+    number_of_replicas=0
+)
+
+
+@project_index.doc_type
+class ProjectDocument(DocType):
+
+    class Meta(object):
+        model = Project
+        fields = ('name', 'slug', 'description')
+
+    url = fields.TextField()
+    users = fields.NestedField(properties={
+        'username': fields.TextField(),
+        'id': fields.IntegerField(),
+    })
+    language = fields.KeywordField()
+
+    def prepare_url(self, instance):
+        return instance.get_absolute_url()
+
+    @classmethod
+    def faceted_search(cls, query, language=None, using=None, index=None):
+        kwargs = {
+            'using': using or cls._doc_type.using,
+            'index': index or cls._doc_type.index,
+            'doc_types': [cls],
+            'model': cls._doc_type.model,
+            'query': query
+        }
+
+        if language:
+            kwargs['filters'] = {'language': language}
+
+        return ProjectSearch(**kwargs)
diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
@@ -0,0 +1,15 @@
+from elasticsearch_dsl import FacetedSearch, TermsFacet
+
+
+class ProjectSearch(FacetedSearch):
+    fields = ['name^5', 'description']
+    facets = {
+        'language': TermsFacet(field='language')
+    }
+
+    def __init__(self, using, index, doc_types, model, **kwargs):
+        self.using = using
+        self.index = index
+        self.doc_types = doc_types
+        self._model = model
+        super(ProjectSearch, self).__init__(**kwargs)
diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
@@ -19,7 +19,6 @@
 import datetime
 
 from elasticsearch import Elasticsearch, exceptions
-from elasticsearch.helpers import bulk_index
 
 from django.conf import settings
 
@@ -143,7 +142,7 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None,
             docs.append(doc)
 
         # TODO: This doesn't work with the new ES setup.
-        bulk_index(self.es, docs, chunk_size=chunk_size)
+        # bulk_index(self.es, docs, chunk_size=chunk_size)
 
     def index_document(self, data, index=None, parent=None, routing=None):
         doc = self.extract_document(data)

diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py
@@ -3,6 +3,7 @@
 from random import shuffle
 
 import pytest
+from django.core.management import call_command
 from django_dynamic_fixture import G
 
 from readthedocs.projects.models import Project
@@ -16,27 +17,17 @@ def mock_elastic_index(mocker):
     mocker.patch.object(Index, '_index', index_name.lower())
 
 
-@pytest.fixture(autouse=True)
-def es_index(mock_elastic_index):
-    # Create the index.
-    index = Index()
-    index_name = index.timestamped_index()
-    index.create_index(index_name)
-    index.update_aliases(index_name)
-    # Update mapping
-    proj = ProjectIndex()
-    proj.put_mapping()
-    page = PageIndex()
-    page.put_mapping()
-    sec = SectionIndex()
-    sec.put_mapping()
-
-    yield index
-    index.delete_index(index_name=index_name)
+@pytest.fixture()
+def es_index():
+    call_command('search_index', '--delete', '-f')
+    call_command('search_index', '--create')
+
+    yield
+    call_command('search_index', '--delete', '-f')
 
 
 @pytest.fixture
-def all_projects():
+def all_projects(es_index):
     projects = [G(Project, slug=project_slug, name=project_slug) for project_slug in ALL_PROJECTS]
     shuffle(projects)
     return projects

diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py
@@ -1,8 +1,13 @@
+import random
+import string
+
 import pytest
 from django.core.management import call_command
 from django.core.urlresolvers import reverse
 from django_dynamic_fixture import G
+from django_elasticsearch_dsl import Index
 from pyquery import PyQuery as pq
+from pytest_mock import mock
 
 from readthedocs.builds.constants import LATEST
 from readthedocs.builds.models import Version
@@ -12,13 +17,9 @@
 
 @pytest.mark.django_db
 @pytest.mark.search
-class TestElasticSearch(object):
+class TestProjectSearch(object):
     url = reverse('search')
 
-    def _reindex_elasticsearch(self, es_index):
-        call_command('reindex_elasticsearch')
-        es_index.refresh_index()
-
     def _get_search_result(self, url, client, search_params):
         resp = client.get(url, search_params)
         assert resp.status_code == 200
@@ -27,21 +28,26 @@ def _get_search_result(self, url, client, search_params):
         result = page.find('.module-list-wrapper .module-item-title')
         return result, page
 
-    @pytest.fixture(autouse=True)
-    def elastic_index(self, mock_parse_json, all_projects, es_index):
-        self._reindex_elasticsearch(es_index=es_index)
+
+
+    # TODO: Implement a way to generate unique index name in test
+    # @pytest.fixture(autouse=True)
+    # def mock_project_index(self, mocker):
+    #     mocked_document = mocker.patch('readthedocs.search.documents.get_index')
+    #     index_name = ''.join([random.choice(string.ascii_letters) for _ in range(5)])
+    #
+    #     mocked_document.return_value = Index(index_name)
 
     def test_search_by_project_name(self, client, project):
         result, _ = self._get_search_result(url=self.url, client=client,
                                             search_params={'q': project.name})
 
         assert project.name.encode('utf-8') in result.text().encode('utf-8')
 
-    def test_search_project_show_languages(self, client, project, es_index):
+    def test_search_project_show_languages(self, client, project):
         """Test that searching project should show all available languages"""
         # Create a project in bn and add it as a translation
         G(Project, language='bn', name=project.name)
-        self._reindex_elasticsearch(es_index=es_index)
 
         result, page = self._get_search_result(url=self.url, client=client,
                                                search_params={'q': project.name})
@@ -51,11 +57,10 @@ def test_search_project_show_languages(self, client, project, es_index):
         assert len(content) == 2
         assert 'bn' in content.text()
 
-    def test_search_project_filter_language(self, client, project, es_index):
+    def test_search_project_filter_language(self, client, project):
         """Test that searching project filtered according to language"""
         # Create a project in bn and add it as a translation
         translate = G(Project, language='bn', name=project.name)
-        self._reindex_elasticsearch(es_index=es_index)
         search_params = {'q': project.name, 'language': 'bn'}
 
         result, page = self._get_search_result(url=self.url, client=client,
@@ -65,10 +70,33 @@ def test_search_project_filter_language(self, client, project, es_index):
         assert len(result) == 1
 
         content = page.find('.navigable .language-list')
-        # There should be 1 languages
-        assert len(content) == 1
+        # There should be 2 languages because both `en` and `bn` should show there
+        assert len(content) == 2
         assert 'bn' in content.text()
 
+
+@pytest.mark.django_db
+@pytest.mark.search
+@pytest.mark.xfail(reason="File search not work still!")
+class TestElasticSearch(object):
+    url = reverse('search')
+
+    def _reindex_elasticsearch(self, es_index):
+        call_command('reindex_elasticsearch')
+        es_index.refresh_index()
+
+    def _get_search_result(self, url, client, search_params):
+        resp = client.get(url, search_params)
+        assert resp.status_code == 200
+
+        page = pq(resp.content)
+        result = page.find('.module-list-wrapper .module-item-title')
+        return result, page
+
+    @pytest.fixture()
+    def elastic_index(self, mock_parse_json, all_projects, es_index):
+        self._reindex_elasticsearch(es_index=es_index)
+
     @pytest.mark.parametrize('data_type', ['content', 'headers', 'title'])
     @pytest.mark.parametrize('page_num', [0, 1])
     def test_search_by_file_content(self, client, project, data_type, page_num):

diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
@@ -12,6 +12,7 @@
 
 from readthedocs.builds.constants import LATEST
 from readthedocs.search import lib as search_lib
+from readthedocs.search.documents import ProjectDocument
 
 log = logging.getLogger(__name__)
 LOG_TEMPLATE = u'(Elastic Search) [{user}:{type}] [{project}:{version}:{language}] {msg}'
@@ -45,14 +46,18 @@ def elastic_search(request):
 
     if user_input.query:
         if user_input.type == 'project':
-            results = search_lib.search_project(
-                request, user_input.query, language=user_input.language)
+            project_search = ProjectDocument.faceted_search(query=user_input.query,
+                                                            language=user_input.language)
+            response = project_search.execute()
+            results = response.hits
+            facets = response.facets
         elif user_input.type == 'file':
             results = search_lib.search_file(
                 request, user_input.query, project_slug=user_input.project,
                 version_slug=user_input.version, taxonomy=user_input.taxonomy)
 
-    if results:
+    # TODO: Temporary until finishing search upgrade for files
+    if results and user_input.type == 'file':
         # pre and post 1.0 compat
         for num, hit in enumerate(results['hits']['hits']):
             for key, val in list(hit['fields'].items()):

diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
@@ -86,6 +86,7 @@ def INSTALLED_APPS(self):  # noqa
             'django_extensions',
             'messages_extends',
             'tastypie',
+            'django_elasticsearch_dsl',
 
             # our apps
             'readthedocs.projects',
@@ -101,6 +102,7 @@ def INSTALLED_APPS(self):  # noqa
             'readthedocs.notifications',
             'readthedocs.integrations',
             'readthedocs.analytics',
+            'readthedocs.search',
 
 
             # allauth
@@ -315,6 +317,11 @@ def USE_PROMOS(self):  # noqa
     ES_HOSTS = ['127.0.0.1:9200']
     ES_DEFAULT_NUM_REPLICAS = 0
     ES_DEFAULT_NUM_SHARDS = 5
+    ELASTICSEARCH_DSL = {
+        'default': {
+            'hosts': '127.0.0.1:9200'
+        },
+    }
 
     ALLOWED_HOSTS = ['*']
 

diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html
@@ -26,8 +26,8 @@
 
       {% if facets.language %}
         <h5> Language </h5>
-          {% for name, count in facets.language.items %}
-            <li class="{% if language == name %}active{% endif %} language-list">
+          {% for name, count, selected in facets.language %}
+            <li class="{% if selected %}active{% endif %} language-list">
               {% if language == name %}
                 <a href="?{% url_replace request 'language' ''  %}">{{ name }}
               {% else %}
@@ -141,14 +141,14 @@ <h3>{% blocktrans with query=query|default:"" %}Results for {{ query }}{% endblo
               <div class="module-list-wrapper">
 
                 <ul>
-                  {% for result in results.hits.hits %}
+                  {% for result in results %}
                         <li class="module-item">
                           <p class="module-item-title">
-                            {% if result.fields.name %} 
+                            {% if result.name %}
 
                               {# Project #}
-                              <a href="{{ result.fields.url }}">{{ result.fields.name }}</a>
-                              {% for fragment in result.highlight.description|slice:":3" %}
+                              <a href="{{ result.url }}">{{ result.name }}</a>
+                              {% for fragment in result.description|slice:":3" %}
                                 <p>
                                 ...{{ fragment|safe }}...
                                 </p>

diff --git a/requirements/pip.txt b/requirements/pip.txt
@@ -47,8 +47,9 @@ httplib2==0.11.3
 GitPython==2.1.10
 
 # Search
-elasticsearch==1.5.0
-pyelasticsearch==0.7.1
+elasticsearch==6.2.0
+elasticsearch-dsl==6.1.0
+django-elasticsearch-dsl==0.5.0
 pyquery==1.4.0
 
 # Utils

diff --git a/scripts/travis/install_elasticsearch.sh b/scripts/travis/install_elasticsearch.sh
@@ -2,6 +2,6 @@ if [ $ES_DOWNLOAD_URL ]
 then
     wget ${ES_DOWNLOAD_URL}
     tar -xzf elasticsearch-${ES_VERSION}.tar.gz
-    ./elasticsearch-${ES_VERSION}/bin/plugin -install elasticsearch/elasticsearch-analysis-icu/2.3.0
-    ./elasticsearch-${ES_VERSION}/bin/elasticsearch &
+    ./elasticsearch-${ES_VERSION}/bin/elasticsearch-plugin install analysis-icu
+    ./elasticsearch-${ES_VERSION}/bin/elasticsearch -d
 fi