diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index d28d71ba12d..774e36a0657 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -50,8 +50,6 @@ from readthedocs.doc_builder.python_environments import Conda, Virtualenv from readthedocs.projects.models import APIProject from readthedocs.restapi.client import api as api_v2 -from readthedocs.restapi.utils import index_search_request -from readthedocs.search.parse_json import process_all_json_files from readthedocs.vcs_support import utils as vcs_support_utils from readthedocs.worker import app from .constants import LOG_TEMPLATE @@ -902,40 +900,6 @@ def move_files(version_pk, hostname, html=False, localmedia=False, search=False, Syncer.copy(from_path, to_path, host=hostname) -@app.task(queue='web') -def update_search(version_pk, commit, delete_non_commit_files=True): - """ - Task to update search indexes. - - :param version_pk: Version id to update - :param commit: Commit that updated index - :param delete_non_commit_files: Delete files not in commit from index - """ - version = Version.objects.get(pk=version_pk) - - if version.project.is_type_sphinx: - page_list = process_all_json_files(version, build_dir=False) - else: - log.debug('Unknown documentation type: %s', - version.project.documentation_type) - return - - log_msg = ' '.join([page['path'] for page in page_list]) - log.info("(Search Index) Sending Data: %s [%s]", version.project.slug, - log_msg) - index_search_request( - version=version, - page_list=page_list, - commit=commit, - project_scale=0, - page_scale=0, - # Don't index sections to speed up indexing. - # They aren't currently exposed anywhere. - section=False, - delete=delete_non_commit_files, - ) - - @app.task(queue='web') def symlink_project(project_pk): project = Project.objects.get(pk=project_pk) diff --git a/readthedocs/projects/views/public.py b/readthedocs/projects/views/public.py index 9b25cb82bef..f03e8edeb7b 100644 --- a/readthedocs/projects/views/public.py +++ b/readthedocs/projects/views/public.py @@ -28,7 +28,6 @@ from readthedocs.builds.views import BuildTriggerMixin from readthedocs.projects.models import ImportedFile, Project from readthedocs.search.documents import PageDocument -from readthedocs.search.indexes import PageIndex from readthedocs.search.views import LOG_TEMPLATE from .base import ProjectOnboardMixin diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index 02b5ffeb064..288442eb9dd 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -14,15 +14,7 @@ from rest_framework import routers from readthedocs.constants import pattern_opts -from readthedocs.restapi import views -from readthedocs.restapi.views import ( - core_views, - footer_views, - integrations, - search_views, - task_views, -) - +from readthedocs.restapi.views import (core_views, footer_views, task_views, integrations) from .views.model_views import ( BuildCommandViewSet, BuildViewSet, @@ -69,24 +61,6 @@ url(r'footer_html/', footer_views.footer_html, name='footer_html'), ] -search_urls = [ - url( - r'index_search/', - search_views.index_search, - name='index_search', - ), - url(r'^search/$', views.search_views.search, name='api_search'), - url(r'search/project/$', - search_views.project_search, - name='api_project_search', - ), - url( - r'search/section/$', - search_views.section_search, - name='api_section_search', - ), -] - task_urls = [ url( r'jobs/status/(?P[^/]+)/', @@ -138,7 +112,6 @@ urlpatterns += function_urls urlpatterns += task_urls -urlpatterns += search_urls urlpatterns += integration_urls diff --git a/readthedocs/restapi/utils.py b/readthedocs/restapi/utils.py index 9e7f73bfa43..82005f695f7 100644 --- a/readthedocs/restapi/utils.py +++ b/readthedocs/restapi/utils.py @@ -4,7 +4,6 @@ from __future__ import ( absolute_import, division, print_function, unicode_literals) -import hashlib import logging from rest_framework.pagination import PageNumberPagination @@ -13,7 +12,6 @@ NON_REPOSITORY_VERSIONS, STABLE, STABLE_VERBOSE_NAME) from readthedocs.builds.models import Version -from readthedocs.search.indexes import PageIndex, ProjectIndex, SectionIndex log = logging.getLogger(__name__) @@ -154,119 +152,6 @@ def delete_versions(project, version_data): return set() -def index_search_request( - version, page_list, commit, project_scale, page_scale, section=True, - delete=True): - """ - Update search indexes with build output JSON. - - In order to keep sub-projects all indexed on the same shard, indexes will be - updated using the parent project's slug as the routing value. - """ - # TODO refactor this function - # pylint: disable=too-many-locals - project = version.project - - log_msg = ' '.join([page['path'] for page in page_list]) - log.info( - 'Updating search index: project=%s pages=[%s]', - project.slug, - log_msg, - ) - - project_obj = ProjectIndex() - project_obj.index_document( - data={ - 'id': project.pk, - 'name': project.name, - 'slug': project.slug, - 'description': project.description, - 'lang': project.language, - 'author': [user.username for user in project.users.all()], - 'url': project.get_absolute_url(), - 'tags': None, - 'weight': project_scale, - }) - - page_obj = PageIndex() - section_obj = SectionIndex() - index_list = [] - section_index_list = [] - routes = [project.slug] - routes.extend([p.parent.slug for p in project.superprojects.all()]) - for page in page_list: - log.debug('Indexing page: %s:%s', project.slug, page['path']) - to_hash = '-'.join([project.slug, version.slug, page['path']]) - page_id = hashlib.md5(to_hash.encode('utf-8')).hexdigest() - index_list.append({ - 'id': page_id, - 'project': project.slug, - 'version': version.slug, - 'path': page['path'], - 'title': page['title'], - 'headers': page['headers'], - 'content': page['content'], - 'taxonomy': None, - 'commit': commit, - 'weight': page_scale + project_scale, - }) - if section: - for sect in page['sections']: - id_to_hash = '-'.join([ - project.slug, - version.slug, - page['path'], - sect['id'], - ]) - section_index_list.append({ - 'id': (hashlib.md5(id_to_hash.encode('utf-8')).hexdigest()), - 'project': project.slug, - 'version': version.slug, - 'path': page['path'], - 'page_id': sect['id'], - 'title': sect['title'], - 'content': sect['content'], - 'weight': page_scale, - }) - for route in routes: - section_obj.bulk_index( - section_index_list, - parent=page_id, - routing=route, - ) - - for route in routes: - page_obj.bulk_index(index_list, parent=project.slug, routing=route) - - if delete: - log.info('Deleting files not in commit: %s', commit) - # TODO: AK Make sure this works - delete_query = { - 'query': { - 'bool': { - 'must': [ - { - 'term': { - 'project': project.slug, - }, - }, - { - 'term': { - 'version': version.slug, - }, - }, - ], - 'must_not': { - 'term': { - 'commit': commit, - }, - }, - }, - }, - } - page_obj.delete_document(body=delete_query) - - class RemoteOrganizationPagination(PageNumberPagination): page_size = 25 diff --git a/readthedocs/restapi/views/search_views.py b/readthedocs/restapi/views/search_views.py deleted file mode 100644 index abe36174097..00000000000 --- a/readthedocs/restapi/views/search_views.py +++ /dev/null @@ -1,151 +0,0 @@ -"""Endpoints related to searching through projects, sections, etc.""" - -from __future__ import absolute_import -import logging - -from rest_framework import decorators, permissions, status -from rest_framework.renderers import JSONRenderer -from rest_framework.response import Response - -from readthedocs.builds.constants import LATEST -from readthedocs.builds.models import Version -from readthedocs.projects.models import Project, ProjectRelationship -from readthedocs.search.lib import search_file, search_project, search_section -from readthedocs.restapi import utils - - -log = logging.getLogger(__name__) - - -@decorators.api_view(['POST']) -@decorators.permission_classes((permissions.IsAdminUser,)) -@decorators.renderer_classes((JSONRenderer,)) -def index_search(request): - """Add things to the search index.""" - data = request.data['data'] - version_pk = data['version_pk'] - commit = data.get('commit') - version = Version.objects.get(pk=version_pk) - - project_scale = 1 - page_scale = 1 - - utils.index_search_request( - version=version, page_list=data['page_list'], commit=commit, - project_scale=project_scale, page_scale=page_scale) - - return Response({'indexed': True}) - - -@decorators.api_view(['GET']) -@decorators.permission_classes((permissions.AllowAny,)) -@decorators.renderer_classes((JSONRenderer,)) -def search(request): - """Perform search, supplement links by resolving project domains.""" - project_slug = request.GET.get('project', None) - version_slug = request.GET.get('version', LATEST) - query = request.GET.get('q', None) - if project_slug is None or query is None: - return Response({'error': 'Need project and q'}, - status=status.HTTP_400_BAD_REQUEST) - try: - project = Project.objects.get(slug=project_slug) - except Project.DoesNotExist: - return Response({'error': 'Project not found'}, - status=status.HTTP_404_NOT_FOUND) - log.debug("(API Search) %s", query) - results = search_file(request=request, project_slug=project_slug, - version_slug=version_slug, query=query) - - if results is None: - return Response({'error': 'Project not found'}, - status=status.HTTP_404_NOT_FOUND) - - # Supplement result paths with domain information on project - hits = results.get('hits', {}).get('hits', []) - for (n, hit) in enumerate(hits): - fields = hit.get('fields', {}) - search_project = fields.get('project')[0] - search_version = fields.get('version')[0] - path = fields.get('path')[0] - canonical_url = project.get_docs_url(version_slug=version_slug) - if search_project != project_slug: - try: - subproject = project.subprojects.get(child__slug=search_project) - canonical_url = subproject.child.get_docs_url( - version_slug=search_version - ) - except ProjectRelationship.DoesNotExist: - pass - results['hits']['hits'][n]['fields']['link'] = ( - canonical_url + path - ) - - return Response({'results': results}) - - -@decorators.api_view(['GET']) -@decorators.permission_classes((permissions.AllowAny,)) -@decorators.renderer_classes((JSONRenderer,)) -def project_search(request): - query = request.GET.get('q', None) - if query is None: - return Response({'error': 'Need project and q'}, status=status.HTTP_400_BAD_REQUEST) - log.debug("(API Project Search) %s", (query)) - results = search_project(request=request, query=query) - return Response({'results': results}) - - -@decorators.api_view(['GET']) -@decorators.permission_classes((permissions.AllowAny,)) -@decorators.renderer_classes((JSONRenderer,)) -def section_search(request): - """ - Section search. - - Queries with query ``q`` across all documents and projects. Queries can be - limited to a single project or version by using the ``project`` and - ``version`` GET arguments in your request. - - When you search, you will have a ``project`` facet, which includes the - number of matching sections per project. When you search inside a project, - the ``path`` facet will show the number of matching sections per page. - - Possible GET args - ----------------- - - q **(required)** - The query string **Required** - - project - A project slug - - version - A version slug - - path - A file path slug - - - Example:: - - GET /api/v2/search/section/?q=virtualenv&project=django - """ - query = request.GET.get('q', None) - if not query: - return Response( - {'error': 'Search term required. Use the "q" GET arg to search. '}, - status=status.HTTP_400_BAD_REQUEST) - project_slug = request.GET.get('project', None) - version_slug = request.GET.get('version', LATEST) - path = request.GET.get('path', None) - log.debug("(API Section Search) [%s:%s] %s", project_slug, version_slug, - query) - results = search_section( - request=request, - query=query, - project_slug=project_slug, - version_slug=version_slug, - path=path, - ) - return Response({'results': results}) diff --git a/readthedocs/search/indexes.py b/readthedocs/search/indexes.py deleted file mode 100644 index f3401851cfc..00000000000 --- a/readthedocs/search/indexes.py +++ /dev/null @@ -1,372 +0,0 @@ -""" -Search indexing classes to index into Elasticsearch. - -Django settings that should be defined: - - `ES_HOSTS`: A list of hosts where Elasticsearch lives. E.g. - ['192.168.1.1:9200', '192.168.2.1:9200'] - - `ES_DEFAULT_NUM_REPLICAS`: An integer of the number of replicas. - - `ES_DEFAULT_NUM_SHARDS`: An integer of the number of shards. - - -TODO: Handle page removal case in Page. - -""" -from __future__ import absolute_import -from builtins import object -import datetime - -from elasticsearch import Elasticsearch, exceptions - -from django.conf import settings - - -class Index(object): - - """Base class to define some common methods across indexes.""" - - # The _index and _type define the URL path to Elasticsearch, e.g.: - # http://localhost:9200/{_index}/{_type}/_search - _index = 'readthedocs' - _type = None - - def __init__(self): - self.es = Elasticsearch(settings.ES_HOSTS) - - def get_settings(self, settings_override=None): - """ - Returns settings to be passed to ES create_index. - - If `settings_override` is provided, this will use `settings_override` - to override the defaults defined here. - - """ - default_settings = { - 'number_of_replicas': settings.ES_DEFAULT_NUM_REPLICAS, - 'number_of_shards': settings.ES_DEFAULT_NUM_SHARDS, - 'refresh_interval': '5s', - 'store.compress.tv': True, - 'store.compress.stored': True, - 'analysis': self.get_analysis(), - } - if settings_override: - default_settings.update(settings_override) - - return default_settings - - def get_analysis(self): - """ - Returns the analysis dict to be used in settings for create_index. - - For languages that ES supports we define either the minimal or light - stemming, which isn't as aggressive as the snowball stemmer. We also - define the stopwords for that language. - - For all languages we've customized we're using the ICU plugin. - - """ - analyzers = {} - filters = {} - - # The default is used for fields that need ICU but are composed of - # many languages. - analyzers['default_icu'] = { - 'type': 'custom', - 'tokenizer': 'icu_tokenizer', - 'filter': ['word_delimiter', 'icu_folding', 'icu_normalizer'], - } - - # Customize the word_delimiter filter to set various options. - filters['custom_word_delimiter'] = { - 'type': 'word_delimiter', - 'preserve_original': True, - } - - return { - 'analyzer': analyzers, - 'filter': filters, - } - - def timestamped_index(self): - return '{0}-{1}'.format( - self._index, datetime.datetime.now().strftime('%Y%m%d%H%M%S')) - - def create_index(self, index=None): - """ - Creates index. - - This uses `get_settings` and `get_mappings` to define the index. - - """ - index = index or self._index - body = { - 'settings': self.get_settings(), - } - self.es.indices.create(index=index, body=body) - - def refresh_index(self, index=None): - index = index or self._index - self.es.indices.refresh(index=index) - - def put_mapping(self, index=None): - index = index or self._index - self.es.indices.put_mapping(self._type, self.get_mapping(), index) - - def bulk_index(self, data, index=None, chunk_size=500, parent=None, - routing=None): - """ - Given a list of documents, uses Elasticsearch bulk indexing. - - For each doc this calls `extract_document`, then indexes. - - `chunk_size` defaults to the elasticsearch lib's default. Override per - your document size as needed. - - """ - index = index or self._index - docs = [] - for d in data: - source = self.extract_document(d) - doc = { - '_index': index, - '_type': self._type, - '_id': source['id'], - '_source': source, - } - if parent: - doc['_parent'] = parent - if routing: - doc['_routing'] = routing - docs.append(doc) - - # TODO: This doesn't work with the new ES setup. - # bulk_index(self.es, docs, chunk_size=chunk_size) - - def index_document(self, data, index=None, parent=None, routing=None): - doc = self.extract_document(data) - kwargs = { - 'index': index or self._index, - 'doc_type': self._type, - 'body': doc, - 'id': doc['id'] - } - if parent: - kwargs['parent'] = parent - if routing: - kwargs['routing'] = routing - self.es.index(**kwargs) - - def delete_index(self, index_name): - - self.es.indices.delete(index=index_name) - - def delete_document(self, body, index=None, parent=None, routing=None): - kwargs = { - 'index': index or self._index, - 'doc_type': self._type, - 'body': body, - } - if parent: - kwargs['parent'] = parent - if routing: - kwargs['routing'] = routing - return self.es.delete_by_query(**kwargs) - - def get_mapping(self): - """Returns the mapping for this _index and _type.""" - raise NotImplementedError() - - def extract_document(self, data): - """Extracts the Elasticsearch document for this object instance.""" - raise NotImplementedError() - - def update_aliases(self, new_index, delete=True): - """ - Points `_index` to `new_index` and deletes `_index` if delete=True. - - The ES `update_aliases` is atomic. - """ - old_index = None - - # Get current alias, if any. - try: - aliases = self.es.indices.get_alias(name=self._index) - if aliases and list(aliases.keys()): - old_index = list(aliases.keys())[0] - except exceptions.NotFoundError: - pass - - actions = [] - if old_index: - actions.append({'remove': {'index': old_index, - 'alias': self._index}}) - actions.append({'add': {'index': new_index, 'alias': self._index}}) - - self.es.indices.update_aliases(body={'actions': actions}) - - # Delete old index if any and if specified. - if delete and old_index: - self.es.indices.delete(index=old_index) - - def search(self, body, **kwargs): - return self.es.search(index=self._index, doc_type=self._type, - body=body, **kwargs) - - -class ProjectIndex(Index): - - """Search index configuration for Projects""" - - _type = 'project' - - def get_mapping(self): - mapping = { - self._type: { - # Disable _all field to reduce index size. - '_all': {'enabled': False}, - 'properties': { - 'id': {'type': 'long'}, - 'name': {'type': 'string', 'analyzer': 'default_icu'}, - 'description': {'type': 'string', 'analyzer': 'default_icu'}, - - 'slug': {'type': 'string', 'index': 'not_analyzed'}, - 'lang': {'type': 'string', 'index': 'not_analyzed'}, - 'tags': {'type': 'string', 'index': 'not_analyzed'}, - 'privacy': {'type': 'string', 'index': 'not_analyzed'}, - 'author': { - 'type': 'string', - 'analyzer': 'default_icu', - 'fields': { - 'raw': { - 'type': 'string', - 'index': 'not_analyzed', - }, - }, - }, - 'url': {'type': 'string', 'index': 'not_analyzed'}, - # Add a weight field to enhance relevancy scoring. - 'weight': {'type': 'float'}, - } - } - } - - return mapping - - def extract_document(self, data): - doc = {} - - attrs = ('id', 'name', 'slug', 'description', 'lang', 'tags', 'author', 'url') - for attr in attrs: - doc[attr] = data.get(attr, '') - - # Add project boost. - doc['weight'] = data.get('weight', 1.0) - - return doc - - -class PageIndex(Index): - - """Search index configuration for Pages""" - - _type = 'page' - _parent = 'project' - - def get_mapping(self): - mapping = { - self._type: { - # Disable _all field to reduce index size. - '_all': {'enabled': False}, - # Associate a page with a project. - '_parent': {'type': self._parent}, - 'properties': { - 'id': {'type': 'string', 'index': 'not_analyzed'}, - 'sha': {'type': 'string', 'index': 'not_analyzed'}, - 'project': {'type': 'string', 'index': 'not_analyzed'}, - 'version': {'type': 'string', 'index': 'not_analyzed'}, - 'path': {'type': 'string', 'index': 'not_analyzed'}, - 'taxonomy': {'type': 'string', 'index': 'not_analyzed'}, - 'commit': {'type': 'string', 'index': 'not_analyzed'}, - - 'title': {'type': 'string', 'analyzer': 'default_icu'}, - 'headers': {'type': 'string', 'analyzer': 'default_icu'}, - 'content': {'type': 'string', 'analyzer': 'default_icu'}, - # Add a weight field to enhance relevancy scoring. - 'weight': {'type': 'float'}, - } - } - } - - return mapping - - def extract_document(self, data): - doc = {} - - attrs = ('id', 'project', 'title', 'headers', 'version', 'path', - 'content', 'taxonomy', 'commit') - for attr in attrs: - doc[attr] = data.get(attr, '') - - # Add page boost. - doc['weight'] = data.get('weight', 1.0) - - return doc - - -class SectionIndex(Index): - - """Search index configuration for Sections""" - - _type = 'section' - _parent = 'page' - - def get_mapping(self): - mapping = { - self._type: { - # Disable _all field to reduce index size. - '_all': {'enabled': False}, - # Associate a section with a page. - '_parent': {'type': self._parent}, - # Commenting this out until we need it. - # 'suggest': { - # "type": "completion", - # "index_analyzer": "simple", - # "search_analyzer": "simple", - # "payloads": True, - # }, - 'properties': { - 'id': {'type': 'string', 'index': 'not_analyzed'}, - 'project': {'type': 'string', 'index': 'not_analyzed'}, - 'version': {'type': 'string', 'index': 'not_analyzed'}, - 'path': {'type': 'string', 'index': 'not_analyzed'}, - 'page_id': {'type': 'string', 'index': 'not_analyzed'}, - 'commit': {'type': 'string', 'index': 'not_analyzed'}, - 'title': {'type': 'string', 'analyzer': 'default_icu'}, - 'content': {'type': 'string', 'analyzer': 'default_icu'}, - 'blocks': { - 'type': 'object', - 'properties': { - 'code': {'type': 'string', 'analyzer': 'default_icu'} - } - }, - # Add a weight field to enhance relevancy scoring. - 'weight': {'type': 'float'}, - } - } - } - - return mapping - - def extract_document(self, data): - doc = {} - - attrs = ('id', 'project', 'title', 'page_id', 'version', 'path', 'content', 'commit') - for attr in attrs: - doc[attr] = data.get(attr, '') - - # Add page boost. - doc['weight'] = data.get('weight', 1.0) - - return doc diff --git a/readthedocs/search/lib.py b/readthedocs/search/lib.py deleted file mode 100644 index 8500a829b03..00000000000 --- a/readthedocs/search/lib.py +++ /dev/null @@ -1,250 +0,0 @@ -"""Utilities related to searching Elastic.""" -from __future__ import absolute_import -from __future__ import print_function -from pprint import pprint - -from django.conf import settings - -from .indexes import PageIndex, ProjectIndex, SectionIndex - -from readthedocs.builds.constants import LATEST -from readthedocs.projects.models import Project -from readthedocs.search.signals import (before_project_search, - before_file_search, - before_section_search) - - -def search_project(request, query, language=None): - """Search index for projects matching query.""" - body = { - "query": { - "bool": { - "should": [ - {"match": {"name": {"query": query, "boost": 10}}}, - {"match": {"description": {"query": query}}}, - ] - }, - }, - "facets": { - "language": { - "terms": {"field": "lang"}, - }, - }, - "highlight": { - "fields": { - "name": {}, - "description": {}, - } - }, - "fields": ["name", "slug", "description", "lang", "url"], - "size": 50 # TODO: Support pagination. - } - - if language: - body['facets']['language']['facet_filter'] = {"term": {"lang": language}} - body['filter'] = {"term": {"lang": language}} - - before_project_search.send(request=request, sender=ProjectIndex, body=body) - - return ProjectIndex().search(body) - - -def search_file(request, query, project_slug=None, version_slug=LATEST, taxonomy=None): - """ - Search index for files matching query. - - Raises a 404 error on missing project - - :param request: request instance - :param query: string to query for - :param project_slug: :py:class:`Project` slug - :param version_slug: slug for :py:class:`Project` version slug - :param taxonomy: taxonomy for search - """ - kwargs = {} - body = { - "query": { - "bool": { - "should": [ - {"match_phrase": { - "title": { - "query": query, - "boost": 10, - "slop": 2, - }, - }}, - {"match_phrase": { - "headers": { - "query": query, - "boost": 5, - "slop": 3, - }, - }}, - {"match_phrase": { - "content": { - "query": query, - "slop": 5, - }, - }}, - ] - } - }, - "facets": { - "taxonomy": { - "terms": {"field": "taxonomy"}, - }, - "project": { - "terms": {"field": "project"}, - }, - "version": { - "terms": {"field": "version"}, - }, - }, - "highlight": { - "fields": { - "title": {}, - "headers": {}, - "content": {}, - } - }, - "fields": ["title", "project", "version", "path"], - "size": 50 # TODO: Support pagination. - } - - if project_slug or version_slug or taxonomy: - final_filter = {"and": []} - - if project_slug: - try: - project = (Project.objects - .api(request.user) - .get(slug=project_slug)) - project_slugs = [project.slug] - # We need to use the obtuse syntax here because the manager - # doesn't pass along to ProjectRelationships - project_slugs.extend(s.slug for s - in Project.objects.public( - request.user).filter( - superprojects__parent__slug=project.slug)) - final_filter['and'].append({"terms": {"project": project_slugs}}) - - # Add routing to optimize search by hitting the right shard. - # This purposely doesn't apply routing if the project has more - # than one parent project. - if project.superprojects.exists(): - if project.superprojects.count() == 1: - kwargs['routing'] = (project.superprojects.first() - .parent.slug) - else: - kwargs['routing'] = project_slug - except Project.DoesNotExist: - return None - - if version_slug: - final_filter['and'].append({'term': {'version': version_slug}}) - - if taxonomy: - final_filter['and'].append({'term': {'taxonomy': taxonomy}}) - - body['filter'] = final_filter - body['facets']['project']['facet_filter'] = final_filter - body['facets']['version']['facet_filter'] = final_filter - body['facets']['taxonomy']['facet_filter'] = final_filter - - if settings.DEBUG: - print("Before Signal") - pprint(body) - before_file_search.send(request=request, sender=PageIndex, body=body) - if settings.DEBUG: - print("After Signal") - pprint(body) - - return PageIndex().search(body, **kwargs) - - -def search_section(request, query, project_slug=None, version_slug=LATEST, - path=None): - """ - Search for a section of content. - - When you search, you will have a ``project`` facet, which includes the - number of matching sections per project. When you search inside a project, - the ``path`` facet will show the number of matching sections per page. - - :param request: Request instance - :param query: string to use in query - :param project_slug: :py:class:`Project` instance - :param version_slug: :py:class:`Project` version instance - :param taxonomy: search taxonomy - """ - kwargs = {} - body = { - "query": { - "bool": { - "should": [ - {"match_phrase": { - "title": { - "query": query, - "boost": 10, - "slop": 2, - }, - }}, - {"match_phrase": { - "content": { - "query": query, - "slop": 5, - }, - }}, - ] - } - }, - "facets": { - "project": { - "terms": {"field": "project"}, - "facet_filter": { - "term": {"version": version_slug}, - } - }, - }, - "highlight": { - "fields": { - "title": {}, - "content": {}, - } - }, - "fields": ["title", "project", "version", "path", "page_id", "content"], - "size": 10 # TODO: Support pagination. - } - - if project_slug: - body['filter'] = { - "and": [ - {"term": {"project": project_slug}}, - {"term": {"version": version_slug}}, - ] - } - body['facets']['path'] = { - "terms": {"field": "path"}, - "facet_filter": { - "term": {"project": project_slug}, - } - }, - # Add routing to optimize search by hitting the right shard. - kwargs['routing'] = project_slug - - if path: - body['filter'] = { - "and": [ - {"term": {"path": path}}, - ] - } - - if path and not project_slug: - # Show facets when we only have a path - body['facets']['path'] = { - "terms": {"field": "path"} - } - - before_section_search.send(request=request, sender=PageIndex, body=body) - - return SectionIndex().search(body, **kwargs) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 062ee1fed50..a48a2e5031a 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,11 +8,9 @@ from pprint import pprint from django.conf import settings -from django.shortcuts import render, get_object_or_404 +from django.shortcuts import render from readthedocs.builds.constants import LATEST -from readthedocs.projects.models import Project -from readthedocs.search import lib as search_lib from readthedocs.search.documents import ProjectDocument, PageDocument from readthedocs.search.utils import get_project_list_or_404