diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py
index b736a1cd426..24927a02f3e 100644
--- a/readthedocs/core/management/commands/reindex_elasticsearch.py
+++ b/readthedocs/core/management/commands/reindex_elasticsearch.py
@@ -1,12 +1,14 @@
-"""Reindex Elastic Search indexes"""
+# -*- coding: utf-8 -*-
+"""Reindex Elastic Search indexes."""
+
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals)
-from __future__ import absolute_import
import logging
+import socket
from optparse import make_option
-from django.core.management.base import BaseCommand
-from django.core.management.base import CommandError
-from django.conf import settings
+from django.core.management.base import BaseCommand, CommandError
from readthedocs.builds.constants import LATEST
from readthedocs.builds.models import Version
@@ -23,34 +25,42 @@ class Command(BaseCommand):
dest='project',
default='',
help='Project to index'),
+ make_option('-l',
+ dest='only_latest',
+ default=False,
+ action='store_true',
+ help='Only index latest'),
)
def handle(self, *args, **options):
- """Build/index all versions or a single project's version"""
+ """Build/index all versions or a single project's version."""
project = options['project']
+ only_latest = options['only_latest']
- queryset = Version.objects.all()
+ queryset = Version.objects.filter(active=True)
if project:
queryset = queryset.filter(project__slug=project)
if not queryset.exists():
raise CommandError(
- 'No project with slug: {slug}'.format(slug=project))
- log.info("Building all versions for %s", project)
- elif getattr(settings, 'INDEX_ONLY_LATEST', True):
+ u'No project with slug: {slug}'.format(slug=project))
+ log.info(u'Building all versions for %s', project)
+ if only_latest:
+ log.warning('Indexing only latest')
queryset = queryset.filter(slug=LATEST)
- for version in queryset:
- log.info("Reindexing %s", version)
- try:
- commit = version.project.vcs_repo(version.slug).commit
- except: # pylint: disable=bare-except
- # An exception can be thrown here in production, but it's not
- # documented what the exception here is
- commit = None
-
+ for version_pk, version_slug, project_slug in queryset.values_list(
+ 'pk', 'slug', 'project__slug'):
+ log.info(u'Reindexing %s:%s' % (project_slug, version_slug))
try:
- update_search(version.pk, commit,
- delete_non_commit_files=False)
+ update_search.apply_async(
+ kwargs=dict(
+ version_pk=version_pk,
+ commit='reindex',
+ delete_non_commit_files=False
+ ),
+ priority=0,
+ queue=socket.gethostname()
+ )
except Exception:
- log.exception('Reindex failed for %s', version)
+ log.exception(u'Reindexing failed for %s:%s' % (project_slug, version_slug))
diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py
index 61f6cfeca64..6db89a93ce0 100644
--- a/readthedocs/projects/views/public.py
+++ b/readthedocs/projects/views/public.py
@@ -305,6 +305,10 @@ def elastic_project_search(request, project_slug):
{'match': {'title': {'query': query, 'boost': 10}}},
{'match': {'headers': {'query': query, 'boost': 5}}},
{'match': {'content': {'query': query}}},
+ ],
+ 'filter': [
+ {'term': {'project': project_slug}},
+ {'term': {'version': version_slug}},
]
}
},
@@ -315,13 +319,7 @@ def elastic_project_search(request, project_slug):
'content': {},
}
},
- 'fields': ['title', 'project', 'version', 'path'],
- 'filter': {
- 'and': [
- {'term': {'project': project_slug}},
- {'term': {'version': version_slug}},
- ]
- },
+ '_source': ['title', 'project', 'version', 'path'],
'size': 50, # TODO: Support pagination.
}
@@ -335,9 +333,12 @@ def elastic_project_search(request, project_slug):
if results:
# pre and post 1.0 compat
for num, hit in enumerate(results['hits']['hits']):
- for key, val in list(hit['fields'].items()):
+ for key, val in list(hit['_source'].items()):
if isinstance(val, list):
- results['hits']['hits'][num]['fields'][key] = val[0]
+ results['hits']['hits'][num]['_source'][key] = val[0]
+ # we cannot render attributes starting with an underscore
+ hit['fields'] = hit['_source']
+ del hit['_source']
return render(
request,
diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py
index 5480ce98093..08315000a1c 100644
--- a/readthedocs/restapi/urls.py
+++ b/readthedocs/restapi/urls.py
@@ -50,7 +50,7 @@
url(r'index_search/',
search_views.index_search,
name='index_search'),
- url(r'search/$', views.search_views.search, name='api_search'),
+ url(r'^search/$', views.search_views.search, name='api_search'),
url(r'search/project/$',
search_views.project_search,
name='api_project_search'),
diff --git a/readthedocs/restapi/utils.py b/readthedocs/restapi/utils.py
index 00e9ec15937..74eb6d8b413 100644
--- a/readthedocs/restapi/utils.py
+++ b/readthedocs/restapi/utils.py
@@ -161,12 +161,11 @@ def index_search_request(
for route in routes:
section_obj.bulk_index(
section_index_list,
- parent=page_id,
routing=route,
)
for route in routes:
- page_obj.bulk_index(index_list, parent=project.slug, routing=route)
+ page_obj.bulk_index(index_list, routing=route)
if delete:
log.info('Deleting files not in commit: %s', commit)
diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py
index abe36174097..a0d64b401bd 100644
--- a/readthedocs/restapi/views/search_views.py
+++ b/readthedocs/restapi/views/search_views.py
@@ -32,7 +32,7 @@ def index_search(request):
utils.index_search_request(
version=version, page_list=data['page_list'], commit=commit,
- project_scale=project_scale, page_scale=page_scale)
+ project_scale=project_scale, page_scale=page_scale, section=False)
return Response({'indexed': True})
@@ -64,7 +64,7 @@ def search(request):
# Supplement result paths with domain information on project
hits = results.get('hits', {}).get('hits', [])
for (n, hit) in enumerate(hits):
- fields = hit.get('fields', {})
+ fields = hit.get('_source', {})
search_project = fields.get('project')[0]
search_version = fields.get('version')[0]
path = fields.get('path')[0]
@@ -77,9 +77,12 @@ def search(request):
)
except ProjectRelationship.DoesNotExist:
pass
- results['hits']['hits'][n]['fields']['link'] = (
+ results['hits']['hits'][n]['_source']['link'] = (
canonical_url + path
)
+ # we cannot render attributes starting with an underscore
+ results['hits']['hits'][n]['fields'] = results['hits']['hits'][n]['_source']
+ del results['hits']['hits'][n]['_source']
return Response({'results': results})
diff --git a/readthedocs/rtd_tests/mocks/search_mock_responses.py b/readthedocs/rtd_tests/mocks/search_mock_responses.py
new file mode 100644
index 00000000000..2ade8103c52
--- /dev/null
+++ b/readthedocs/rtd_tests/mocks/search_mock_responses.py
@@ -0,0 +1,160 @@
+search_project_response = """
+{
+ "took": 17,
+ "timed_out": false,
+ "_shards": {
+ "total": 5,
+ "successful": 5,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": 1,
+ "max_score": 1.8232156,
+ "hits": [
+ {
+ "_index": "readthedocs",
+ "_type": "project",
+ "_id": "6",
+ "_score": 1.8232156,
+ "_source": {
+ "name": "Pip",
+ "description": "",
+ "lang": "en",
+ "url": "/projects/pip/",
+ "slug": "pip"
+ },
+ "highlight": {
+ "name": [
+ "Pip"
+ ]
+ }
+ }
+ ]
+ },
+ "aggregations": {
+ "language": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "en",
+ "doc_count": 1
+ }
+ ]
+ }
+ }
+}
+"""
+
+search_file_response = """
+{
+ "took": 27,
+ "timed_out": false,
+ "_shards": {
+ "total": 5,
+ "successful": 5,
+ "skipped": 0,
+ "failed": 0
+ },
+ "hits": {
+ "total": 3,
+ "max_score": 6.989019,
+ "hits": [
+ {
+ "_index": "readthedocs",
+ "_type": "page",
+ "_id": "AWKuy4jp-H7vMtbTbHP5",
+ "_score": 6.989019,
+ "_routing": "prova",
+ "_source": {
+ "path": "_docs/cap2",
+ "project": "prova",
+ "title": "Capitolo 2",
+ "version": "latest"
+ },
+ "highlight": {
+ "headers": [
+ "Capitolo 2"
+ ],
+ "title": [
+ "Capitolo 2"
+ ],
+ "content": [
+ "Capitolo 2 In questo capitolo, vengono trattate"
+ ]
+ }
+ },
+ {
+ "_index": "readthedocs",
+ "_type": "page",
+ "_id": "AWKuy4jp-H7vMtbTbHP4",
+ "_score": 6.973402,
+ "_routing": "prova",
+ "_source": {
+ "path": "_docs/cap1",
+ "project": "prova",
+ "title": "Capitolo 1",
+ "version": "latest"
+ },
+ "highlight": {
+ "headers": [
+ "Capitolo 1"
+ ],
+ "title": [
+ "Capitolo 1"
+ ],
+ "content": [
+ "Capitolo 1 In questo capitolo, le funzioni principali"
+ ]
+ }
+ },
+ {
+ "_index": "readthedocs",
+ "_type": "page",
+ "_id": "AWKuy4jp-H7vMtbTbHP3",
+ "_score": 0.2017303,
+ "_routing": "prova",
+ "_source": {
+ "path": "index",
+ "project": "prova",
+ "title": "Titolo del documento",
+ "version": "latest"
+ },
+ "highlight": {
+ "content": [
+ "Titolo del documento Nel Capitolo 1 Nel Capitolo 2"
+ ]
+ }
+ }
+ ]
+ },
+ "aggregations": {
+ "project": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "prova",
+ "doc_count": 3
+ }
+ ]
+ },
+ "taxonomy": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": []
+ },
+ "version": {
+ "doc_count_error_upper_bound": 0,
+ "sum_other_doc_count": 0,
+ "buckets": [
+ {
+ "key": "latest",
+ "doc_count": 3
+ }
+ ]
+ }
+ }
+}
+"""
diff --git a/readthedocs/rtd_tests/tests/test_search.py b/readthedocs/rtd_tests/tests/test_search.py
new file mode 100644
index 00000000000..01096a098c7
--- /dev/null
+++ b/readthedocs/rtd_tests/tests/test_search.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+from __future__ import (
+ absolute_import, division, print_function, unicode_literals)
+
+import json
+
+from django.core.urlresolvers import reverse
+from django.test import TestCase, RequestFactory
+from mock import patch
+from urllib3._collections import HTTPHeaderDict
+
+from readthedocs.projects.models import Project
+from readthedocs.rtd_tests.mocks.search_mock_responses import (
+ search_project_response, search_file_response
+)
+
+
+class TestSearch(TestCase):
+ fixtures = ['eric', 'test_data']
+
+ def setUp(self):
+ self.client.login(username='eric', password='test')
+ self.pip = Project.objects.get(slug='pip')
+ self.factory = RequestFactory()
+
+ def perform_request_file_mock(self, method, url, params=None, body=None, timeout=None, ignore=()):
+ """
+ Elastic Search Urllib3HttpConnection mock for file search
+ """
+ headers = HTTPHeaderDict({
+ 'content-length': '893',
+ 'content-type': 'application/json; charset=UTF-8'
+ })
+ raw_data = search_file_response
+ return 200, headers, raw_data
+
+ def perform_request_project_mock(self, method, url, params=None, body=None, timeout=None, ignore=()):
+ """
+ Elastic Search Urllib3HttpConnection mock for project search
+ """
+ headers = HTTPHeaderDict({
+ 'content-length': '893',
+ 'content-type': 'application/json; charset=UTF-8'
+ })
+ raw_data = search_project_response
+ return 200, headers, raw_data
+
+ @patch(
+ 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+ side_effect=perform_request_project_mock
+ )
+ def test_search_project(self, perform_request_mock):
+ """
+ Tests the search view (by project) by mocking the perform request method
+ of the elastic search module. Checks the query string provided
+ to elastic search.
+ """
+ self.client.login(username='eric', password='test')
+ r = self.client.get(
+ reverse('search'),
+ {'q': 'pip', 'type': 'project', 'project': None}
+ )
+ self.assertEqual(r.status_code, 200)
+ response = perform_request_mock.call_args_list[0][0][3]
+ query_dict = json.loads(response)
+ self.assertIn('query', query_dict)
+ self.assertDictEqual({
+ 'bool': {
+ 'should': [
+ {'match': {'name': {'query': 'pip', 'boost': 10}}},
+ {'match': {'description': {'query': 'pip'}}}
+ ]
+ }
+ }, query_dict['query'])
+ main_hit = r.context['results']['hits']['hits'][0]
+ self.assertEqual(r.status_code, 200)
+ self.assertEqual(main_hit['_type'], 'project')
+ self.assertEqual(main_hit['_type'], 'project')
+ self.assertEqual(main_hit['fields']['name'], 'Pip')
+ self.assertEqual(main_hit['fields']['slug'], 'pip')
+
+ @patch(
+ 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+ side_effect=perform_request_file_mock
+ )
+ def test_search_file(self, perform_request_mock):
+ """
+ Tests the search view (by file) by mocking the perform request method
+ of the elastic search module. Checks the query string provided
+ to elastic search.
+ """
+ self.client.login(username='eric', password='test')
+ r = self.client.get(
+ reverse('search'),
+ {'q': 'capitolo', 'type': 'file'}
+ )
+ response = perform_request_mock.call_args_list[0][0][3]
+ query_dict = json.loads(response)
+ self.assertIn('query', query_dict)
+ self.assertDictEqual({
+ 'bool': {
+ 'filter': [{'term': {'version': 'latest'}}],
+ 'should': [
+ {'match_phrase': {'title': {'query': 'capitolo', 'boost': 10, 'slop': 2}}},
+ {'match_phrase': {'headers': {'query': 'capitolo', 'boost': 5, 'slop': 3}}},
+ {'match_phrase': {'content': {'query': 'capitolo', 'slop': 5}}}
+ ]
+ }
+ }, query_dict['query'])
+ main_hit = r.context['results']['hits']['hits'][0]
+ self.assertEqual(r.status_code, 200)
+ self.assertEqual(main_hit['_type'], 'page')
+ self.assertEqual(main_hit['fields']['project'], 'prova')
+ self.assertEqual(main_hit['fields']['path'], '_docs/cap2')
+
+ @patch(
+ 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request',
+ side_effect=perform_request_file_mock
+ )
+ def test_search_in_project(self, perform_request_mock):
+ """
+ Tests the search view (by file) by mocking the perform request method
+ of the elastic search module. Checks the query string provided
+ to elastic search.
+ """
+ self.client.login(username='eric', password='test')
+ r = self.client.get(
+ '/projects/pip/search/',
+ {'q': 'capitolo'}
+ )
+ response = perform_request_mock.call_args_list[0][0][3]
+ query_dict = json.loads(response)
+ self.assertDictEqual({
+ 'bool': {
+ 'should': [
+ {'match': {'title': {'boost': 10, 'query': 'capitolo'}}},
+ {'match': {'headers': {'boost': 5, 'query': 'capitolo'}}},
+ {'match': {'content': {'query': 'capitolo'}}}
+ ],
+ 'filter': [
+ {'term': {'project': 'pip'}},
+ {'term': {'version': 'latest'}}
+ ]
+ }
+ }, query_dict['query'])
+ main_hit = r.context['results']['hits']['hits'][0]
+ self.assertEqual(r.status_code, 200)
+ self.assertEqual(main_hit['_type'], 'page')
+ self.assertEqual(main_hit['fields']['project'], 'prova')
+ self.assertEqual(main_hit['fields']['path'], '_docs/cap2')
diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py
index 1b2ede6aaa9..2a0b725c0fe 100644
--- a/readthedocs/search/indexes.py
+++ b/readthedocs/search/indexes.py
@@ -19,7 +19,7 @@
import datetime
from elasticsearch import Elasticsearch, exceptions
-from elasticsearch.helpers import bulk_index
+from elasticsearch.helpers import bulk
from django.conf import settings
@@ -48,8 +48,6 @@ def get_settings(self, settings_override=None):
'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS,
'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS,
'refresh_interval': '5s',
- 'store.compress.tv': True,
- 'store.compress.stored': True,
'analysis': self.get_analysis(),
}
if settings_override:
@@ -76,7 +74,7 @@ def get_analysis(self):
analyzers['default_icu'] = {
'type': 'custom',
'tokenizer': 'icu_tokenizer',
- 'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'],
+ 'filter': ['custom_word_delimiter', 'icu_folding', 'icu_normalizer', 'lowercase'],
}
# Customize the word_delimiter filter to set various options.
@@ -129,17 +127,14 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None,
doc = {
'_index': index,
'_type': self._type,
- '_id': source['id'],
'_source': source,
}
- if parent:
- doc['_parent'] = parent
if routing:
doc['_routing'] = routing
docs.append(doc)
# TODO: This doesn't work with the new ES setup.
- bulk_index(self.es, docs, chunk_size=chunk_size)
+ bulk(self.es, docs, chunk_size=chunk_size)
def index_document(self, data, index=None, parent=None, routing=None):
doc = self.extract_document(data)
@@ -220,25 +215,24 @@ def get_mapping(self):
# Disable _all field to reduce index size.
'_all': {'enabled': False},
'properties': {
- 'id': {'type': 'long'},
- 'name': {'type': 'string', 'analyzer': 'default_icu'},
- 'description': {'type': 'string', 'analyzer': 'default_icu'},
-
- 'slug': {'type': 'string', 'index': 'not_analyzed'},
- 'lang': {'type': 'string', 'index': 'not_analyzed'},
- 'tags': {'type': 'string', 'index': 'not_analyzed'},
- 'privacy': {'type': 'string', 'index': 'not_analyzed'},
+ 'id': {'type': 'keyword'},
+ 'name': {'type': 'text', 'analyzer': 'default_icu'},
+ 'description': {'type': 'text', 'analyzer': 'default_icu'},
+
+ 'slug': {'type': 'keyword'},
+ 'lang': {'type': 'keyword'},
+ 'tags': {'type': 'keyword'},
+ 'privacy': {'type': 'keyword'},
'author': {
- 'type': 'string',
+ 'type': 'text',
'analyzer': 'default_icu',
'fields': {
'raw': {
- 'type': 'string',
- 'index': 'not_analyzed',
+ 'type': 'keyword',
},
},
},
- 'url': {'type': 'string', 'index': 'not_analyzed'},
+ 'url': {'type': 'keyword'},
# Add a weight field to enhance relevancy scoring.
'weight': {'type': 'float'},
}
@@ -272,22 +266,27 @@ def get_mapping(self):
self._type: {
# Disable _all field to reduce index size.
'_all': {'enabled': False},
- # Associate a page with a project.
- '_parent': {'type': self._parent},
'properties': {
- 'id': {'type': 'string', 'index': 'not_analyzed'},
- 'sha': {'type': 'string', 'index': 'not_analyzed'},
- 'project': {'type': 'string', 'index': 'not_analyzed'},
- 'version': {'type': 'string', 'index': 'not_analyzed'},
- 'path': {'type': 'string', 'index': 'not_analyzed'},
- 'taxonomy': {'type': 'string', 'index': 'not_analyzed'},
- 'commit': {'type': 'string', 'index': 'not_analyzed'},
-
- 'title': {'type': 'string', 'analyzer': 'default_icu'},
- 'headers': {'type': 'string', 'analyzer': 'default_icu'},
- 'content': {'type': 'string', 'analyzer': 'default_icu'},
+ 'id': {'type': 'keyword'},
+ 'sha': {'type': 'keyword'},
+ 'project': {'type': 'keyword'},
+ 'version': {'type': 'keyword'},
+ 'path': {'type': 'keyword'},
+ 'taxonomy': {'type': 'keyword'},
+ 'commit': {'type': 'keyword'},
+
+ 'title': {'type': 'text', 'analyzer': 'default_icu'},
+ 'headers': {'type': 'text', 'analyzer': 'default_icu'},
+ 'content': {'type': 'text', 'analyzer': 'default_icu'},
# Add a weight field to enhance relevancy scoring.
'weight': {'type': 'float'},
+ # Associate a page with a project.
+ self._parent: {
+ 'type': 'join',
+ 'relations': {
+ self._parent: self._type
+ }
+ },
}
}
}
@@ -297,7 +296,7 @@ def get_mapping(self):
def extract_document(self, data):
doc = {}
- attrs = ('id', 'project', 'title', 'headers', 'version', 'path',
+ attrs = ('project', 'title', 'headers', 'version', 'path',
'content', 'taxonomy', 'commit')
for attr in attrs:
doc[attr] = data.get(attr, '')
@@ -320,8 +319,6 @@ def get_mapping(self):
self._type: {
# Disable _all field to reduce index size.
'_all': {'enabled': False},
- # Associate a section with a page.
- '_parent': {'type': self._parent},
# Commenting this out until we need it.
# 'suggest': {
# "type": "completion",
@@ -330,22 +327,29 @@ def get_mapping(self):
# "payloads": True,
# },
'properties': {
- 'id': {'type': 'string', 'index': 'not_analyzed'},
- 'project': {'type': 'string', 'index': 'not_analyzed'},
- 'version': {'type': 'string', 'index': 'not_analyzed'},
- 'path': {'type': 'string', 'index': 'not_analyzed'},
- 'page_id': {'type': 'string', 'index': 'not_analyzed'},
- 'commit': {'type': 'string', 'index': 'not_analyzed'},
- 'title': {'type': 'string', 'analyzer': 'default_icu'},
- 'content': {'type': 'string', 'analyzer': 'default_icu'},
+ 'id': {'type': 'keyword'},
+ 'project': {'type': 'keyword'},
+ 'version': {'type': 'keyword'},
+ 'path': {'type': 'keyword'},
+ 'page_id': {'type': 'keyword'},
+ 'commit': {'type': 'keyword'},
+ 'title': {'type': 'text', 'analyzer': 'default_icu'},
+ 'content': {'type': 'text', 'analyzer': 'default_icu'},
'blocks': {
'type': 'object',
'properties': {
- 'code': {'type': 'string', 'analyzer': 'default_icu'}
+ 'code': {'type': 'text', 'analyzer': 'default_icu'}
}
},
# Add a weight field to enhance relevancy scoring.
'weight': {'type': 'float'},
+ # Associate a section with a page.
+ self._parent: {
+ 'type': 'join',
+ 'relations': {
+ self._parent: self._type
+ }
+ },
}
}
}
diff --git a/readthedocs/search/lib.py b/readthedocs/search/lib.py
index 8500a829b03..5ace5d84094 100644
--- a/readthedocs/search/lib.py
+++ b/readthedocs/search/lib.py
@@ -25,9 +25,9 @@ def search_project(request, query, language=None):
]
},
},
- "facets": {
+ "aggs": {
"language": {
- "terms": {"field": "lang"},
+ "terms": {"field": "lang.keyword"},
},
},
"highlight": {
@@ -36,13 +36,12 @@ def search_project(request, query, language=None):
"description": {},
}
},
- "fields": ["name", "slug", "description", "lang", "url"],
+ "_source": ["name", "slug", "description", "lang", "url"],
"size": 50 # TODO: Support pagination.
}
if language:
- body['facets']['language']['facet_filter'] = {"term": {"lang": language}}
- body['filter'] = {"term": {"lang": language}}
+ body['query']['bool']['filter'] = {"term": {"lang": language}}
before_project_search.send(request=request, sender=ProjectIndex, body=body)
@@ -89,15 +88,15 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
]
}
},
- "facets": {
+ "aggs": {
"taxonomy": {
- "terms": {"field": "taxonomy"},
+ "terms": {"field": "taxonomy.keyword"},
},
"project": {
- "terms": {"field": "project"},
+ "terms": {"field": "project.keyword"},
},
"version": {
- "terms": {"field": "version"},
+ "terms": {"field": "version.keyword"},
},
},
"highlight": {
@@ -107,12 +106,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
"content": {},
}
},
- "fields": ["title", "project", "version", "path"],
+ "_source": ["title", "project", "version", "path"],
"size": 50 # TODO: Support pagination.
}
if project_slug or version_slug or taxonomy:
- final_filter = {"and": []}
+ final_filter = []
if project_slug:
try:
@@ -126,7 +125,7 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
in Project.objects.public(
request.user).filter(
superprojects__parent__slug=project.slug))
- final_filter['and'].append({"terms": {"project": project_slugs}})
+ final_filter.append({"terms": {"project": project_slugs}})
# Add routing to optimize search by hitting the right shard.
# This purposely doesn't apply routing if the project has more
@@ -141,15 +140,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy
return None
if version_slug:
- final_filter['and'].append({'term': {'version': version_slug}})
+ final_filter.append({'term': {'version': version_slug}})
if taxonomy:
- final_filter['and'].append({'term': {'taxonomy': taxonomy}})
+ final_filter.append({'term': {'taxonomy': taxonomy}})
- body['filter'] = final_filter
- body['facets']['project']['facet_filter'] = final_filter
- body['facets']['version']['facet_filter'] = final_filter
- body['facets']['taxonomy']['facet_filter'] = final_filter
+ body['query']['bool']['filter'] = final_filter
if settings.DEBUG:
print("Before Signal")
@@ -167,9 +163,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
"""
Search for a section of content.
- When you search, you will have a ``project`` facet, which includes the
+ When you search, you will have a ``project`` facet (aggs), which includes the
number of matching sections per project. When you search inside a project,
- the ``path`` facet will show the number of matching sections per page.
+ the ``path`` aggs will show the number of matching sections per page.
:param request: Request instance
:param query: string to use in query
@@ -198,12 +194,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
]
}
},
- "facets": {
+ "aggs": {
"project": {
- "terms": {"field": "project"},
- "facet_filter": {
- "term": {"version": version_slug},
- }
+ "terms": {"field": "project.keyword"},
},
},
"highlight": {
@@ -212,36 +205,29 @@ def search_section(request, query, project_slug=None, version_slug=LATEST,
"content": {},
}
},
- "fields": ["title", "project", "version", "path", "page_id", "content"],
+ "_source": ["title", "project", "version", "path", "page_id", "content"],
"size": 10 # TODO: Support pagination.
}
if project_slug:
- body['filter'] = {
- "and": [
- {"term": {"project": project_slug}},
- {"term": {"version": version_slug}},
- ]
- }
- body['facets']['path'] = {
+ body['query']['bool']['filter'] = [
+ {"term": {"project": project_slug}},
+ {"term": {"version": version_slug}},
+ ]
+ body['aggs']['path'] = {
"terms": {"field": "path"},
- "facet_filter": {
- "term": {"project": project_slug},
- }
},
# Add routing to optimize search by hitting the right shard.
kwargs['routing'] = project_slug
if path:
- body['filter'] = {
- "and": [
- {"term": {"path": path}},
- ]
- }
+ body['query']['bool']['filter'] = [
+ {"term": {"path": path}},
+ ]
if path and not project_slug:
# Show facets when we only have a path
- body['facets']['path'] = {
+ body['aggs']['path'] = {
"terms": {"field": "path"}
}
diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
index 7d3a51d5fc2..2cf7a057218 100644
--- a/readthedocs/search/views.py
+++ b/readthedocs/search/views.py
@@ -55,16 +55,19 @@ def elastic_search(request):
if results:
# pre and post 1.0 compat
for num, hit in enumerate(results['hits']['hits']):
- for key, val in list(hit['fields'].items()):
+ for key, val in list(hit['_source'].items()):
if isinstance(val, list):
- results['hits']['hits'][num]['fields'][key] = val[0]
+ results['hits']['hits'][num]['_source'][key] = val[0]
+ # we cannot render attributes starting with an underscore
+ hit['fields'] = hit['_source']
+ del hit['_source']
- if 'facets' in results:
+ if 'aggregations' in results:
for facet_type in ['project', 'version', 'taxonomy', 'language']:
- if facet_type in results['facets']:
+ if facet_type in results['aggregations']:
facets[facet_type] = collections.OrderedDict()
- for term in results['facets'][facet_type]['terms']:
- facets[facet_type][term['term']] = term['count']
+ for term in results['aggregations'][facet_type]['buckets']:
+ facets[facet_type][term['key']] = term['doc_count']
if settings.DEBUG:
print(pprint(results))
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
index 8057901534f..31754880ee0 100644
--- a/readthedocs/settings/base.py
+++ b/readthedocs/settings/base.py
@@ -120,6 +120,7 @@ def INSTALLED_APPS(self): # noqa
apps.append('django_countries')
apps.append('readthedocsext.donate')
apps.append('readthedocsext.embed')
+ apps.append('readthedocsext.search')
return apps
@property
diff --git a/readthedocs/urls.py b/readthedocs/urls.py
index b6053ab1983..25e04135d84 100644
--- a/readthedocs/urls.py
+++ b/readthedocs/urls.py
@@ -88,7 +88,7 @@
if settings.USE_PROMOS:
# Include donation URL's
- groups.append([
+ groups.insert(0, [
url(r'^sustainability/', include('readthedocsext.donate.urls')),
])
@@ -98,6 +98,12 @@
url(r'^api/v1/embed/', include('readthedocsext.embed.urls'))
)
+if 'readthedocsext.search' in settings.INSTALLED_APPS:
+ for num, _url in enumerate(rtd_urls):
+ if _url and hasattr(_url, 'name') and _url.name == 'search':
+ rtd_urls[num] = \
+ url(r'^search/', 'readthedocsext.search.mainsearch.elastic_search', name='search'),
+
if not getattr(settings, 'USE_SUBDOMAIN', False) or settings.DEBUG:
groups.insert(0, docs_urls)
if getattr(settings, 'ALLOW_ADMIN', True):
diff --git a/requirements/pip.txt b/requirements/pip.txt
index a6dc76d9588..9b3fb8072af 100644
--- a/requirements/pip.txt
+++ b/requirements/pip.txt
@@ -51,8 +51,7 @@ httplib2==0.10.3
GitPython==2.1.8
# Search
-elasticsearch==1.5.0
-pyelasticsearch==0.7.1
+elasticsearch==5.5.2
pyquery==1.4.0
# Utils