diff --git a/readthedocs/core/management/commands/reindex_elasticsearch.py b/readthedocs/core/management/commands/reindex_elasticsearch.py index b736a1cd426..24927a02f3e 100644 --- a/readthedocs/core/management/commands/reindex_elasticsearch.py +++ b/readthedocs/core/management/commands/reindex_elasticsearch.py @@ -1,12 +1,14 @@ -"""Reindex Elastic Search indexes""" +# -*- coding: utf-8 -*- +"""Reindex Elastic Search indexes.""" + +from __future__ import ( + absolute_import, division, print_function, unicode_literals) -from __future__ import absolute_import import logging +import socket from optparse import make_option -from django.core.management.base import BaseCommand -from django.core.management.base import CommandError -from django.conf import settings +from django.core.management.base import BaseCommand, CommandError from readthedocs.builds.constants import LATEST from readthedocs.builds.models import Version @@ -23,34 +25,42 @@ class Command(BaseCommand): dest='project', default='', help='Project to index'), + make_option('-l', + dest='only_latest', + default=False, + action='store_true', + help='Only index latest'), ) def handle(self, *args, **options): - """Build/index all versions or a single project's version""" + """Build/index all versions or a single project's version.""" project = options['project'] + only_latest = options['only_latest'] - queryset = Version.objects.all() + queryset = Version.objects.filter(active=True) if project: queryset = queryset.filter(project__slug=project) if not queryset.exists(): raise CommandError( - 'No project with slug: {slug}'.format(slug=project)) - log.info("Building all versions for %s", project) - elif getattr(settings, 'INDEX_ONLY_LATEST', True): + u'No project with slug: {slug}'.format(slug=project)) + log.info(u'Building all versions for %s', project) + if only_latest: + log.warning('Indexing only latest') queryset = queryset.filter(slug=LATEST) - for version in queryset: - log.info("Reindexing %s", version) - try: - commit = version.project.vcs_repo(version.slug).commit - except: # pylint: disable=bare-except - # An exception can be thrown here in production, but it's not - # documented what the exception here is - commit = None - + for version_pk, version_slug, project_slug in queryset.values_list( + 'pk', 'slug', 'project__slug'): + log.info(u'Reindexing %s:%s' % (project_slug, version_slug)) try: - update_search(version.pk, commit, - delete_non_commit_files=False) + update_search.apply_async( + kwargs=dict( + version_pk=version_pk, + commit='reindex', + delete_non_commit_files=False + ), + priority=0, + queue=socket.gethostname() + ) except Exception: - log.exception('Reindex failed for %s', version) + log.exception(u'Reindexing failed for %s:%s' % (project_slug, version_slug)) diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 61f6cfeca64..6db89a93ce0 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -305,6 +305,10 @@ def elastic_project_search(request, project_slug): {'match': {'title': {'query': query, 'boost': 10}}}, {'match': {'headers': {'query': query, 'boost': 5}}}, {'match': {'content': {'query': query}}}, + ], + 'filter': [ + {'term': {'project': project_slug}}, + {'term': {'version': version_slug}}, ] } }, @@ -315,13 +319,7 @@ def elastic_project_search(request, project_slug): 'content': {}, } }, - 'fields': ['title', 'project', 'version', 'path'], - 'filter': { - 'and': [ - {'term': {'project': project_slug}}, - {'term': {'version': version_slug}}, - ] - }, + '_source': ['title', 'project', 'version', 'path'], 'size': 50, # TODO: Support pagination. } @@ -335,9 +333,12 @@ def elastic_project_search(request, project_slug): if results: # pre and post 1.0 compat for num, hit in enumerate(results['hits']['hits']): - for key, val in list(hit['fields'].items()): + for key, val in list(hit['_source'].items()): if isinstance(val, list): - results['hits']['hits'][num]['fields'][key] = val[0] + results['hits']['hits'][num]['_source'][key] = val[0] + # we cannot render attributes starting with an underscore + hit['fields'] = hit['_source'] + del hit['_source'] return render( request, diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 5480ce98093..08315000a1c 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -50,7 +50,7 @@ url(r'index_search/', search_views.index_search, name='index_search'), - url(r'search/$', views.search_views.search, name='api_search'), + url(r'^search/$', views.search_views.search, name='api_search'), url(r'search/project/$', search_views.project_search, name='api_project_search'), diff --git a/readthedocs/restapi/utils.py b/readthedocs/restapi/utils.py index 00e9ec15937..74eb6d8b413 100644 --- a/readthedocs/restapi/utils.py +++ b/readthedocs/restapi/utils.py @@ -161,12 +161,11 @@ def index_search_request( for route in routes: section_obj.bulk_index( section_index_list, - parent=page_id, routing=route, ) for route in routes: - page_obj.bulk_index(index_list, parent=project.slug, routing=route) + page_obj.bulk_index(index_list, routing=route) if delete: log.info('Deleting files not in commit: %s', commit) diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py index abe36174097..a0d64b401bd 100644 --- a/readthedocs/restapi/views/search_views.py +++ b/readthedocs/restapi/views/search_views.py @@ -32,7 +32,7 @@ def index_search(request): utils.index_search_request( version=version, page_list=data['page_list'], commit=commit, - project_scale=project_scale, page_scale=page_scale) + project_scale=project_scale, page_scale=page_scale, section=False) return Response({'indexed': True}) @@ -64,7 +64,7 @@ def search(request): # Supplement result paths with domain information on project hits = results.get('hits', {}).get('hits', []) for (n, hit) in enumerate(hits): - fields = hit.get('fields', {}) + fields = hit.get('_source', {}) search_project = fields.get('project')[0] search_version = fields.get('version')[0] path = fields.get('path')[0] @@ -77,9 +77,12 @@ def search(request): ) except ProjectRelationship.DoesNotExist: pass - results['hits']['hits'][n]['fields']['link'] = ( + results['hits']['hits'][n]['_source']['link'] = ( canonical_url + path ) + # we cannot render attributes starting with an underscore + results['hits']['hits'][n]['fields'] = results['hits']['hits'][n]['_source'] + del results['hits']['hits'][n]['_source'] return Response({'results': results}) diff --git a/readthedocs/rtd_tests/mocks/search_mock_responses.py b/readthedocs/rtd_tests/mocks/search_mock_responses.py new file mode 100644 index 00000000000..2ade8103c52 --- /dev/null +++ b/readthedocs/rtd_tests/mocks/search_mock_responses.py @@ -0,0 +1,160 @@ +search_project_response = """ +{ + "took": 17, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": 1, + "max_score": 1.8232156, + "hits": [ + { + "_index": "readthedocs", + "_type": "project", + "_id": "6", + "_score": 1.8232156, + "_source": { + "name": "Pip", + "description": "", + "lang": "en", + "url": "/projects/pip/", + "slug": "pip" + }, + "highlight": { + "name": [ + "Pip" + ] + } + } + ] + }, + "aggregations": { + "language": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "en", + "doc_count": 1 + } + ] + } + } +} +""" + +search_file_response = """ +{ + "took": 27, + "timed_out": false, + "_shards": { + "total": 5, + "successful": 5, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": 3, + "max_score": 6.989019, + "hits": [ + { + "_index": "readthedocs", + "_type": "page", + "_id": "AWKuy4jp-H7vMtbTbHP5", + "_score": 6.989019, + "_routing": "prova", + "_source": { + "path": "_docs/cap2", + "project": "prova", + "title": "Capitolo 2", + "version": "latest" + }, + "highlight": { + "headers": [ + "Capitolo 2" + ], + "title": [ + "Capitolo 2" + ], + "content": [ + "Capitolo 2 In questo capitolo, vengono trattate" + ] + } + }, + { + "_index": "readthedocs", + "_type": "page", + "_id": "AWKuy4jp-H7vMtbTbHP4", + "_score": 6.973402, + "_routing": "prova", + "_source": { + "path": "_docs/cap1", + "project": "prova", + "title": "Capitolo 1", + "version": "latest" + }, + "highlight": { + "headers": [ + "Capitolo 1" + ], + "title": [ + "Capitolo 1" + ], + "content": [ + "Capitolo 1 In questo capitolo, le funzioni principali" + ] + } + }, + { + "_index": "readthedocs", + "_type": "page", + "_id": "AWKuy4jp-H7vMtbTbHP3", + "_score": 0.2017303, + "_routing": "prova", + "_source": { + "path": "index", + "project": "prova", + "title": "Titolo del documento", + "version": "latest" + }, + "highlight": { + "content": [ + "Titolo del documento Nel Capitolo 1 Nel Capitolo 2" + ] + } + } + ] + }, + "aggregations": { + "project": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "prova", + "doc_count": 3 + } + ] + }, + "taxonomy": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [] + }, + "version": { + "doc_count_error_upper_bound": 0, + "sum_other_doc_count": 0, + "buckets": [ + { + "key": "latest", + "doc_count": 3 + } + ] + } + } +} +""" diff --git a/readthedocs/rtd_tests/tests/test_search.py b/readthedocs/rtd_tests/tests/test_search.py new file mode 100644 index 00000000000..01096a098c7 --- /dev/null +++ b/readthedocs/rtd_tests/tests/test_search.py @@ -0,0 +1,150 @@ +# -*- coding: utf-8 -*- +from __future__ import ( + absolute_import, division, print_function, unicode_literals) + +import json + +from django.core.urlresolvers import reverse +from django.test import TestCase, RequestFactory +from mock import patch +from urllib3._collections import HTTPHeaderDict + +from readthedocs.projects.models import Project +from readthedocs.rtd_tests.mocks.search_mock_responses import ( + search_project_response, search_file_response +) + + +class TestSearch(TestCase): + fixtures = ['eric', 'test_data'] + + def setUp(self): + self.client.login(username='eric', password='test') + self.pip = Project.objects.get(slug='pip') + self.factory = RequestFactory() + + def perform_request_file_mock(self, method, url, params=None, body=None, timeout=None, ignore=()): + """ + Elastic Search Urllib3HttpConnection mock for file search + """ + headers = HTTPHeaderDict({ + 'content-length': '893', + 'content-type': 'application/json; charset=UTF-8' + }) + raw_data = search_file_response + return 200, headers, raw_data + + def perform_request_project_mock(self, method, url, params=None, body=None, timeout=None, ignore=()): + """ + Elastic Search Urllib3HttpConnection mock for project search + """ + headers = HTTPHeaderDict({ + 'content-length': '893', + 'content-type': 'application/json; charset=UTF-8' + }) + raw_data = search_project_response + return 200, headers, raw_data + + @patch( + 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request', + side_effect=perform_request_project_mock + ) + def test_search_project(self, perform_request_mock): + """ + Tests the search view (by project) by mocking the perform request method + of the elastic search module. Checks the query string provided + to elastic search. + """ + self.client.login(username='eric', password='test') + r = self.client.get( + reverse('search'), + {'q': 'pip', 'type': 'project', 'project': None} + ) + self.assertEqual(r.status_code, 200) + response = perform_request_mock.call_args_list[0][0][3] + query_dict = json.loads(response) + self.assertIn('query', query_dict) + self.assertDictEqual({ + 'bool': { + 'should': [ + {'match': {'name': {'query': 'pip', 'boost': 10}}}, + {'match': {'description': {'query': 'pip'}}} + ] + } + }, query_dict['query']) + main_hit = r.context['results']['hits']['hits'][0] + self.assertEqual(r.status_code, 200) + self.assertEqual(main_hit['_type'], 'project') + self.assertEqual(main_hit['_type'], 'project') + self.assertEqual(main_hit['fields']['name'], 'Pip') + self.assertEqual(main_hit['fields']['slug'], 'pip') + + @patch( + 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request', + side_effect=perform_request_file_mock + ) + def test_search_file(self, perform_request_mock): + """ + Tests the search view (by file) by mocking the perform request method + of the elastic search module. Checks the query string provided + to elastic search. + """ + self.client.login(username='eric', password='test') + r = self.client.get( + reverse('search'), + {'q': 'capitolo', 'type': 'file'} + ) + response = perform_request_mock.call_args_list[0][0][3] + query_dict = json.loads(response) + self.assertIn('query', query_dict) + self.assertDictEqual({ + 'bool': { + 'filter': [{'term': {'version': 'latest'}}], + 'should': [ + {'match_phrase': {'title': {'query': 'capitolo', 'boost': 10, 'slop': 2}}}, + {'match_phrase': {'headers': {'query': 'capitolo', 'boost': 5, 'slop': 3}}}, + {'match_phrase': {'content': {'query': 'capitolo', 'slop': 5}}} + ] + } + }, query_dict['query']) + main_hit = r.context['results']['hits']['hits'][0] + self.assertEqual(r.status_code, 200) + self.assertEqual(main_hit['_type'], 'page') + self.assertEqual(main_hit['fields']['project'], 'prova') + self.assertEqual(main_hit['fields']['path'], '_docs/cap2') + + @patch( + 'elasticsearch.connection.http_urllib3.Urllib3HttpConnection.perform_request', + side_effect=perform_request_file_mock + ) + def test_search_in_project(self, perform_request_mock): + """ + Tests the search view (by file) by mocking the perform request method + of the elastic search module. Checks the query string provided + to elastic search. + """ + self.client.login(username='eric', password='test') + r = self.client.get( + '/projects/pip/search/', + {'q': 'capitolo'} + ) + response = perform_request_mock.call_args_list[0][0][3] + query_dict = json.loads(response) + self.assertDictEqual({ + 'bool': { + 'should': [ + {'match': {'title': {'boost': 10, 'query': 'capitolo'}}}, + {'match': {'headers': {'boost': 5, 'query': 'capitolo'}}}, + {'match': {'content': {'query': 'capitolo'}}} + ], + 'filter': [ + {'term': {'project': 'pip'}}, + {'term': {'version': 'latest'}} + ] + } + }, query_dict['query']) + main_hit = r.context['results']['hits']['hits'][0] + self.assertEqual(r.status_code, 200) + self.assertEqual(main_hit['_type'], 'page') + self.assertEqual(main_hit['fields']['project'], 'prova') + self.assertEqual(main_hit['fields']['path'], '_docs/cap2') diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py index 1b2ede6aaa9..2a0b725c0fe 100644 --- a/readthedocs/search/indexes.py +++ b/readthedocs/search/indexes.py @@ -19,7 +19,7 @@ import datetime from elasticsearch import Elasticsearch, exceptions -from elasticsearch.helpers import bulk_index +from elasticsearch.helpers import bulk from django.conf import settings @@ -48,8 +48,6 @@ def get_settings(self, settings_override=None): 'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS, 'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS, 'refresh_interval': '5s', - 'store.compress.tv': True, - 'store.compress.stored': True, 'analysis': self.get_analysis(), } if settings_override: @@ -76,7 +74,7 @@ def get_analysis(self): analyzers['default_icu'] = { 'type': 'custom', 'tokenizer': 'icu_tokenizer', - 'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'], + 'filter': ['custom_word_delimiter', 'icu_folding', 'icu_normalizer', 'lowercase'], } # Customize the word_delimiter filter to set various options. @@ -129,17 +127,14 @@ def bulk_index(self, data, index=None, chunk_size=500, parent=None, doc = { '_index': index, '_type': self._type, - '_id': source['id'], '_source': source, } - if parent: - doc['_parent'] = parent if routing: doc['_routing'] = routing docs.append(doc) # TODO: This doesn't work with the new ES setup. - bulk_index(self.es, docs, chunk_size=chunk_size) + bulk(self.es, docs, chunk_size=chunk_size) def index_document(self, data, index=None, parent=None, routing=None): doc = self.extract_document(data) @@ -220,25 +215,24 @@ def get_mapping(self): # Disable _all field to reduce index size. '_all': {'enabled': False}, 'properties': { - 'id': {'type': 'long'}, - 'name': {'type': 'string', 'analyzer': 'default_icu'}, - 'description': {'type': 'string', 'analyzer': 'default_icu'}, - - 'slug': {'type': 'string', 'index': 'not_analyzed'}, - 'lang': {'type': 'string', 'index': 'not_analyzed'}, - 'tags': {'type': 'string', 'index': 'not_analyzed'}, - 'privacy': {'type': 'string', 'index': 'not_analyzed'}, + 'id': {'type': 'keyword'}, + 'name': {'type': 'text', 'analyzer': 'default_icu'}, + 'description': {'type': 'text', 'analyzer': 'default_icu'}, + + 'slug': {'type': 'keyword'}, + 'lang': {'type': 'keyword'}, + 'tags': {'type': 'keyword'}, + 'privacy': {'type': 'keyword'}, 'author': { - 'type': 'string', + 'type': 'text', 'analyzer': 'default_icu', 'fields': { 'raw': { - 'type': 'string', - 'index': 'not_analyzed', + 'type': 'keyword', }, }, }, - 'url': {'type': 'string', 'index': 'not_analyzed'}, + 'url': {'type': 'keyword'}, # Add a weight field to enhance relevancy scoring. 'weight': {'type': 'float'}, } @@ -272,22 +266,27 @@ def get_mapping(self): self._type: { # Disable _all field to reduce index size. '_all': {'enabled': False}, - # Associate a page with a project. - '_parent': {'type': self._parent}, 'properties': { - 'id': {'type': 'string', 'index': 'not_analyzed'}, - 'sha': {'type': 'string', 'index': 'not_analyzed'}, - 'project': {'type': 'string', 'index': 'not_analyzed'}, - 'version': {'type': 'string', 'index': 'not_analyzed'}, - 'path': {'type': 'string', 'index': 'not_analyzed'}, - 'taxonomy': {'type': 'string', 'index': 'not_analyzed'}, - 'commit': {'type': 'string', 'index': 'not_analyzed'}, - - 'title': {'type': 'string', 'analyzer': 'default_icu'}, - 'headers': {'type': 'string', 'analyzer': 'default_icu'}, - 'content': {'type': 'string', 'analyzer': 'default_icu'}, + 'id': {'type': 'keyword'}, + 'sha': {'type': 'keyword'}, + 'project': {'type': 'keyword'}, + 'version': {'type': 'keyword'}, + 'path': {'type': 'keyword'}, + 'taxonomy': {'type': 'keyword'}, + 'commit': {'type': 'keyword'}, + + 'title': {'type': 'text', 'analyzer': 'default_icu'}, + 'headers': {'type': 'text', 'analyzer': 'default_icu'}, + 'content': {'type': 'text', 'analyzer': 'default_icu'}, # Add a weight field to enhance relevancy scoring. 'weight': {'type': 'float'}, + # Associate a page with a project. + self._parent: { + 'type': 'join', + 'relations': { + self._parent: self._type + } + }, } } } @@ -297,7 +296,7 @@ def get_mapping(self): def extract_document(self, data): doc = {} - attrs = ('id', 'project', 'title', 'headers', 'version', 'path', + attrs = ('project', 'title', 'headers', 'version', 'path', 'content', 'taxonomy', 'commit') for attr in attrs: doc[attr] = data.get(attr, '') @@ -320,8 +319,6 @@ def get_mapping(self): self._type: { # Disable _all field to reduce index size. '_all': {'enabled': False}, - # Associate a section with a page. - '_parent': {'type': self._parent}, # Commenting this out until we need it. # 'suggest': { # "type": "completion", @@ -330,22 +327,29 @@ def get_mapping(self): # "payloads": True, # }, 'properties': { - 'id': {'type': 'string', 'index': 'not_analyzed'}, - 'project': {'type': 'string', 'index': 'not_analyzed'}, - 'version': {'type': 'string', 'index': 'not_analyzed'}, - 'path': {'type': 'string', 'index': 'not_analyzed'}, - 'page_id': {'type': 'string', 'index': 'not_analyzed'}, - 'commit': {'type': 'string', 'index': 'not_analyzed'}, - 'title': {'type': 'string', 'analyzer': 'default_icu'}, - 'content': {'type': 'string', 'analyzer': 'default_icu'}, + 'id': {'type': 'keyword'}, + 'project': {'type': 'keyword'}, + 'version': {'type': 'keyword'}, + 'path': {'type': 'keyword'}, + 'page_id': {'type': 'keyword'}, + 'commit': {'type': 'keyword'}, + 'title': {'type': 'text', 'analyzer': 'default_icu'}, + 'content': {'type': 'text', 'analyzer': 'default_icu'}, 'blocks': { 'type': 'object', 'properties': { - 'code': {'type': 'string', 'analyzer': 'default_icu'} + 'code': {'type': 'text', 'analyzer': 'default_icu'} } }, # Add a weight field to enhance relevancy scoring. 'weight': {'type': 'float'}, + # Associate a section with a page. + self._parent: { + 'type': 'join', + 'relations': { + self._parent: self._type + } + }, } } } diff --git a/readthedocs/search/lib.py b/readthedocs/search/lib.py index 8500a829b03..5ace5d84094 100644 --- a/readthedocs/search/lib.py +++ b/readthedocs/search/lib.py @@ -25,9 +25,9 @@ def search_project(request, query, language=None): ] }, }, - "facets": { + "aggs": { "language": { - "terms": {"field": "lang"}, + "terms": {"field": "lang.keyword"}, }, }, "highlight": { @@ -36,13 +36,12 @@ def search_project(request, query, language=None): "description": {}, } }, - "fields": ["name", "slug", "description", "lang", "url"], + "_source": ["name", "slug", "description", "lang", "url"], "size": 50 # TODO: Support pagination. } if language: - body['facets']['language']['facet_filter'] = {"term": {"lang": language}} - body['filter'] = {"term": {"lang": language}} + body['query']['bool']['filter'] = {"term": {"lang": language}} before_project_search.send(request=request, sender=ProjectIndex, body=body) @@ -89,15 +88,15 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy ] } }, - "facets": { + "aggs": { "taxonomy": { - "terms": {"field": "taxonomy"}, + "terms": {"field": "taxonomy.keyword"}, }, "project": { - "terms": {"field": "project"}, + "terms": {"field": "project.keyword"}, }, "version": { - "terms": {"field": "version"}, + "terms": {"field": "version.keyword"}, }, }, "highlight": { @@ -107,12 +106,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy "content": {}, } }, - "fields": ["title", "project", "version", "path"], + "_source": ["title", "project", "version", "path"], "size": 50 # TODO: Support pagination. } if project_slug or version_slug or taxonomy: - final_filter = {"and": []} + final_filter = [] if project_slug: try: @@ -126,7 +125,7 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy in Project.objects.public( request.user).filter( superprojects__parent__slug=project.slug)) - final_filter['and'].append({"terms": {"project": project_slugs}}) + final_filter.append({"terms": {"project": project_slugs}}) # Add routing to optimize search by hitting the right shard. # This purposely doesn't apply routing if the project has more @@ -141,15 +140,12 @@ def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy return None if version_slug: - final_filter['and'].append({'term': {'version': version_slug}}) + final_filter.append({'term': {'version': version_slug}}) if taxonomy: - final_filter['and'].append({'term': {'taxonomy': taxonomy}}) + final_filter.append({'term': {'taxonomy': taxonomy}}) - body['filter'] = final_filter - body['facets']['project']['facet_filter'] = final_filter - body['facets']['version']['facet_filter'] = final_filter - body['facets']['taxonomy']['facet_filter'] = final_filter + body['query']['bool']['filter'] = final_filter if settings.DEBUG: print("Before Signal") @@ -167,9 +163,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST, """ Search for a section of content. - When you search, you will have a ``project`` facet, which includes the + When you search, you will have a ``project`` facet (aggs), which includes the number of matching sections per project. When you search inside a project, - the ``path`` facet will show the number of matching sections per page. + the ``path`` aggs will show the number of matching sections per page. :param request: Request instance :param query: string to use in query @@ -198,12 +194,9 @@ def search_section(request, query, project_slug=None, version_slug=LATEST, ] } }, - "facets": { + "aggs": { "project": { - "terms": {"field": "project"}, - "facet_filter": { - "term": {"version": version_slug}, - } + "terms": {"field": "project.keyword"}, }, }, "highlight": { @@ -212,36 +205,29 @@ def search_section(request, query, project_slug=None, version_slug=LATEST, "content": {}, } }, - "fields": ["title", "project", "version", "path", "page_id", "content"], + "_source": ["title", "project", "version", "path", "page_id", "content"], "size": 10 # TODO: Support pagination. } if project_slug: - body['filter'] = { - "and": [ - {"term": {"project": project_slug}}, - {"term": {"version": version_slug}}, - ] - } - body['facets']['path'] = { + body['query']['bool']['filter'] = [ + {"term": {"project": project_slug}}, + {"term": {"version": version_slug}}, + ] + body['aggs']['path'] = { "terms": {"field": "path"}, - "facet_filter": { - "term": {"project": project_slug}, - } }, # Add routing to optimize search by hitting the right shard. kwargs['routing'] = project_slug if path: - body['filter'] = { - "and": [ - {"term": {"path": path}}, - ] - } + body['query']['bool']['filter'] = [ + {"term": {"path": path}}, + ] if path and not project_slug: # Show facets when we only have a path - body['facets']['path'] = { + body['aggs']['path'] = { "terms": {"field": "path"} } diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 7d3a51d5fc2..2cf7a057218 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -55,16 +55,19 @@ def elastic_search(request): if results: # pre and post 1.0 compat for num, hit in enumerate(results['hits']['hits']): - for key, val in list(hit['fields'].items()): + for key, val in list(hit['_source'].items()): if isinstance(val, list): - results['hits']['hits'][num]['fields'][key] = val[0] + results['hits']['hits'][num]['_source'][key] = val[0] + # we cannot render attributes starting with an underscore + hit['fields'] = hit['_source'] + del hit['_source'] - if 'facets' in results: + if 'aggregations' in results: for facet_type in ['project', 'version', 'taxonomy', 'language']: - if facet_type in results['facets']: + if facet_type in results['aggregations']: facets[facet_type] = collections.OrderedDict() - for term in results['facets'][facet_type]['terms']: - facets[facet_type][term['term']] = term['count'] + for term in results['aggregations'][facet_type]['buckets']: + facets[facet_type][term['key']] = term['doc_count'] if settings.DEBUG: print(pprint(results)) diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 8057901534f..31754880ee0 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -120,6 +120,7 @@ def INSTALLED_APPS(self): # noqa apps.append('django_countries') apps.append('readthedocsext.donate') apps.append('readthedocsext.embed') + apps.append('readthedocsext.search') return apps @property diff --git a/readthedocs/urls.py b/readthedocs/urls.py index b6053ab1983..25e04135d84 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -88,7 +88,7 @@ if settings.USE_PROMOS: # Include donation URL's - groups.append([ + groups.insert(0, [ url(r'^sustainability/', include('readthedocsext.donate.urls')), ]) @@ -98,6 +98,12 @@ url(r'^api/v1/embed/', include('readthedocsext.embed.urls')) ) +if 'readthedocsext.search' in settings.INSTALLED_APPS: + for num, _url in enumerate(rtd_urls): + if _url and hasattr(_url, 'name') and _url.name == 'search': + rtd_urls[num] = \ + url(r'^search/', 'readthedocsext.search.mainsearch.elastic_search', name='search'), + if not getattr(settings, 'USE_SUBDOMAIN', False) or settings.DEBUG: groups.insert(0, docs_urls) if getattr(settings, 'ALLOW_ADMIN', True): diff --git a/requirements/pip.txt b/requirements/pip.txt index a6dc76d9588..9b3fb8072af 100644 --- a/requirements/pip.txt +++ b/requirements/pip.txt @@ -51,8 +51,7 @@ httplib2==0.10.3 GitPython==2.1.8 # Search -elasticsearch==1.5.0 -pyelasticsearch==0.7.1 +elasticsearch==5.5.2 pyquery==1.4.0 # Utils