From 83c6f400dcdf9c0591c35e85f5f1a39b02f06352 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 23 Oct 2018 15:23:08 +0200 Subject: [PATCH 01/22] Remove old test references to intersphinx --- readthedocs/rtd_tests/tests/test_celery.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_celery.py b/readthedocs/rtd_tests/tests/test_celery.py index 4e5ed8856e6..5760a672524 100644 --- a/readthedocs/rtd_tests/tests/test_celery.py +++ b/readthedocs/rtd_tests/tests/test_celery.py @@ -79,8 +79,7 @@ def test_update_docs(self): result = tasks.update_docs_task.delay( self.project.pk, build_pk=build.pk, - record=False, - intersphinx=False) + record=False) self.assertTrue(result.successful()) @patch('readthedocs.projects.tasks.UpdateDocsTaskStep.setup_python_environment', new=MagicMock) @@ -96,8 +95,7 @@ def test_update_docs_unexpected_setup_exception(self, mock_setup_vcs): result = tasks.update_docs_task.delay( self.project.pk, build_pk=build.pk, - record=False, - intersphinx=False) + record=False) self.assertTrue(result.successful()) @patch('readthedocs.projects.tasks.UpdateDocsTaskStep.setup_python_environment', new=MagicMock) @@ -113,8 +111,7 @@ def test_update_docs_unexpected_build_exception(self, mock_build_docs): result = tasks.update_docs_task.delay( self.project.pk, build_pk=build.pk, - record=False, - intersphinx=False) + record=False) self.assertTrue(result.successful()) def test_sync_repository(self): From db5fbd8d8efd3718db3f6b167a32f0f9367b398b Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 23 Oct 2018 15:24:47 +0200 Subject: [PATCH 02/22] Initial commit of domains app --- readthedocs/domains/__init__.py | 0 .../migrations/0001_create-domain-data.py | 32 ++++++++++++ readthedocs/domains/migrations/__init__.py | 0 readthedocs/domains/models.py | 50 +++++++++++++++++++ 4 files changed, 82 insertions(+) create mode 100644 readthedocs/domains/__init__.py create mode 100644 readthedocs/domains/migrations/0001_create-domain-data.py create mode 100644 readthedocs/domains/migrations/__init__.py create mode 100644 readthedocs/domains/models.py diff --git a/readthedocs/domains/__init__.py b/readthedocs/domains/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/domains/migrations/0001_create-domain-data.py b/readthedocs/domains/migrations/0001_create-domain-data.py new file mode 100644 index 00000000000..6e02dadf977 --- /dev/null +++ b/readthedocs/domains/migrations/0001_create-domain-data.py @@ -0,0 +1,32 @@ +# -*- coding: utf-8 -*- +# Generated by Django 1.11.16 on 2018-10-23 08:24 +from __future__ import unicode_literals + +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + initial = True + + dependencies = [ + ('projects', '0028_create-domain-data'), + ] + + operations = [ + migrations.CreateModel( + name='DomainData', + fields=[ + ('id', models.AutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')), + ('domain', models.CharField(max_length=255, verbose_name='Domain')), + ('name', models.CharField(max_length=255, verbose_name='Name')), + ('display_name', models.CharField(max_length=255, verbose_name='Display Name')), + ('type', models.CharField(max_length=255, verbose_name='Type')), + ('doc_name', models.CharField(max_length=255, verbose_name='Doc Name')), + ('anchor', models.CharField(max_length=255, verbose_name='Anchor')), + ('priority', models.IntegerField(verbose_name='Priority')), + ('project', models.OneToOneField(blank=True, null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='domain_data', to='projects.Project')), + ], + ), + ] diff --git a/readthedocs/domains/migrations/__init__.py b/readthedocs/domains/migrations/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/domains/models.py b/readthedocs/domains/models.py new file mode 100644 index 00000000000..5627d3c4eca --- /dev/null +++ b/readthedocs/domains/models.py @@ -0,0 +1,50 @@ +from django.db import models +from django.utils.encoding import python_2_unicode_compatible +from django.utils.translation import ugettext_lazy as _ + +from readthedocs.projects.models import Project + + +@python_2_unicode_compatible +class DomainData(models.Model): + + """ + Information from a project about it's Sphinx domains. + + This captures data about API objects that exist in that codebase. + """ + + project = models.OneToOneField( + Project, + on_delete=models.SET_NULL, + related_name='domain_data', + null=True, + blank=True, + ) + domain = models.CharField( + _('Domain'), + max_length=255, + ) + name = models.CharField( + _('Name'), + max_length=255, + ) + display_name = models.CharField( + _('Display Name'), + max_length=255, + ) + type = models.CharField( + _('Type'), + max_length=255, + ) + doc_name = models.CharField( + _('Doc Name'), + max_length=255, + ) + anchor = models.CharField( + _('Anchor'), + max_length=255, + ) + priority = models.IntegerField( + _('Priority'), + ) From 41b2857a29696c91b30d8b8e520f92370241d5f8 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 30 Oct 2018 15:04:57 -0500 Subject: [PATCH 03/22] Add initial domaindata modeling and API integration --- readthedocs/domaindata/__init__.py | 0 readthedocs/domaindata/admin.py | 9 +++++ readthedocs/domaindata/api.py | 29 ++++++++++++++++ readthedocs/domaindata/models.py | 55 ++++++++++++++++++++++++++++++ readthedocs/restapi/urls.py | 2 ++ readthedocs/settings/base.py | 3 ++ 6 files changed, 98 insertions(+) create mode 100644 readthedocs/domaindata/__init__.py create mode 100644 readthedocs/domaindata/admin.py create mode 100644 readthedocs/domaindata/api.py create mode 100644 readthedocs/domaindata/models.py diff --git a/readthedocs/domaindata/__init__.py b/readthedocs/domaindata/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/domaindata/admin.py b/readthedocs/domaindata/admin.py new file mode 100644 index 00000000000..3485ee1c572 --- /dev/null +++ b/readthedocs/domaindata/admin.py @@ -0,0 +1,9 @@ +from django.contrib import admin +from .models import DomainData + + +class DomainDataAdmin(admin.ModelAdmin): + list_filter = ('type', 'project') + + +admin.site.register(DomainData, DomainDataAdmin) diff --git a/readthedocs/domaindata/api.py b/readthedocs/domaindata/api.py new file mode 100644 index 00000000000..327b7510e40 --- /dev/null +++ b/readthedocs/domaindata/api.py @@ -0,0 +1,29 @@ +from rest_framework import serializers, viewsets + +from readthedocs.core.resolver import resolve +from .models import DomainData + + +class DomainDataSerializer(serializers.ModelSerializer): + project = serializers.SlugRelatedField(slug_field='slug', read_only=True) + version = serializers.SlugRelatedField(slug_field='slug', read_only=True) + doc_type = serializers.SerializerMethodField() + doc_url = serializers.SerializerMethodField() + + class Meta: + model = DomainData + fields = ('project', 'version', 'name', 'display_name', 'doc_type', 'doc_url') + + def get_doc_type(self, obj): + return f'{obj.domain}:{obj.type}' + + def get_doc_url(self, obj): + path = f'{obj.doc_name}#{obj.anchor}' + full_url = resolve(project=obj.project, version_slug=obj.version.slug, filename=path) + return full_url + + +class DomainDataAPIView(viewsets.ModelViewSet): + queryset = DomainData.objects.public() + serializer_class = DomainDataSerializer + filter_fields = ('project__slug', 'version__slug', 'domain', 'type', 'doc_name', 'name') diff --git a/readthedocs/domaindata/models.py b/readthedocs/domaindata/models.py new file mode 100644 index 00000000000..9271708821f --- /dev/null +++ b/readthedocs/domaindata/models.py @@ -0,0 +1,55 @@ +from django.db import models +from django.utils.encoding import python_2_unicode_compatible +from django.utils.translation import ugettext_lazy as _ + +from readthedocs.builds.models import Version +from readthedocs.projects.models import Project +from readthedocs.projects.querysets import RelatedProjectQuerySet + + +@python_2_unicode_compatible +class DomainData(models.Model): + + """ + Information from a project about it's Sphinx domains. + + This captures data about API objects that exist in that codebase. + """ + + project = models.ForeignKey( + Project, + related_name='domain_data', + ) + version = models.ForeignKey(Version, verbose_name=_('Version'), + related_name='domain_data') + modified_date = models.DateTimeField(_('Publication date'), auto_now=True) + commit = models.CharField(_('Commit'), max_length=255) + + domain = models.CharField( + _('Domain'), + max_length=255, + ) + name = models.CharField( + _('Name'), + max_length=255, + ) + display_name = models.CharField( + _('Display Name'), + max_length=255, + ) + type = models.CharField( + _('Type'), + max_length=255, + ) + doc_name = models.CharField( + _('Doc Name'), + max_length=255, + ) + anchor = models.CharField( + _('Anchor'), + max_length=255, + ) + objects = RelatedProjectQuerySet.as_manager() + + def __str__(self): + return f'DomainData [{self.project.slug}:{self.version.slug}] [{self.domain}:{self.type}] {self.name} -> {self.doc_name}#{self.anchor}' diff --git a/readthedocs/restapi/urls.py b/readthedocs/restapi/urls.py index ffaad2405b6..bb8740881dd 100644 --- a/readthedocs/restapi/urls.py +++ b/readthedocs/restapi/urls.py @@ -34,6 +34,7 @@ SocialAccountViewSet, VersionViewSet, ) +from readthedocs.domaindata.api import DomainDataAPIView router = routers.DefaultRouter() router.register(r'build', BuildViewSet, base_name='build') @@ -42,6 +43,7 @@ router.register(r'project', ProjectViewSet, base_name='project') router.register(r'notification', NotificationViewSet, base_name='emailhook') router.register(r'domain', DomainViewSet, base_name='domain') +router.register(r'domaindata', DomainDataAPIView, base_name='domaindata') router.register( r'remote/org', RemoteOrganizationViewSet, diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index 4770c009337..1d6ade0b897 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -84,6 +84,7 @@ def INSTALLED_APPS(self): # noqa 'django_extensions', 'messages_extends', 'tastypie', + 'django_filters', # our apps 'readthedocs.projects', @@ -99,6 +100,7 @@ def INSTALLED_APPS(self): # noqa 'readthedocs.notifications', 'readthedocs.integrations', 'readthedocs.analytics', + 'readthedocs.domaindata', # allauth @@ -295,6 +297,7 @@ def USE_PROMOS(self): # noqa # CORS CORS_ORIGIN_REGEX_WHITELIST = ( + '(.*)localhost(.*)', '^http://(.+)\.readthedocs\.io$', '^https://(.+)\.readthedocs\.io$' ) From 0f82c26accfb100f0fa3ab79e70bbd1032887d87 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Tue, 30 Oct 2018 18:37:12 -0500 Subject: [PATCH 04/22] Add indexing of DomainData to the builds. --- readthedocs/domaindata/api.py | 18 +++++++-- readthedocs/projects/tasks.py | 76 ++++++++++++++++++++++++++++++----- 2 files changed, 81 insertions(+), 13 deletions(-) diff --git a/readthedocs/domaindata/api.py b/readthedocs/domaindata/api.py index 327b7510e40..d3176d1a396 100644 --- a/readthedocs/domaindata/api.py +++ b/readthedocs/domaindata/api.py @@ -1,5 +1,6 @@ -from rest_framework import serializers, viewsets +from rest_framework import serializers +from readthedocs.restapi.views.model_views import UserSelectViewSet from readthedocs.core.resolver import resolve from .models import DomainData @@ -18,12 +19,21 @@ def get_doc_type(self, obj): return f'{obj.domain}:{obj.type}' def get_doc_url(self, obj): - path = f'{obj.doc_name}#{obj.anchor}' + path = obj.doc_name + if obj.anchor: + path += f'#{obj.anchor}' full_url = resolve(project=obj.project, version_slug=obj.version.slug, filename=path) return full_url -class DomainDataAPIView(viewsets.ModelViewSet): - queryset = DomainData.objects.public() +class DomainDataAdminSerializer(DomainDataSerializer): + + class Meta(DomainDataSerializer.Meta): + fields = '__all__' + + +class DomainDataAPIView(UserSelectViewSet): + model = DomainData serializer_class = DomainDataSerializer + admin_serializer_class = DomainDataAdminSerializer filter_fields = ('project__slug', 'version__slug', 'domain', 'type', 'doc_name', 'name') diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 73acebf46dd..453a0e5d6f9 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -14,6 +14,7 @@ import json import logging import os +import sys import shutil import socket from collections import Counter, defaultdict @@ -26,6 +27,8 @@ from django.db.models import Q from django.utils.translation import ugettext_lazy as _ from slumber.exceptions import HttpClientError +from sphinx.ext import intersphinx + from readthedocs.builds.constants import ( BUILD_STATE_BUILDING, BUILD_STATE_CLONING, BUILD_STATE_FINISHED, @@ -46,6 +49,7 @@ VersionLockedError, YAMLParseError) from readthedocs.doc_builder.loader import get_builder_class from readthedocs.doc_builder.python_environments import Conda, Virtualenv +from readthedocs.domaindata.models import DomainData from readthedocs.projects.models import APIProject from readthedocs.restapi.client import api as api_v2 from readthedocs.restapi.utils import index_search_request @@ -55,7 +59,7 @@ from .constants import LOG_TEMPLATE from .exceptions import RepositoryError -from .models import Domain, Feature, ImportedFile, Project +from .models import Domain, ImportedFile, Project from .signals import ( after_build, after_vcs, before_build, before_vcs, files_changed) @@ -323,16 +327,19 @@ def run(self, pk, version_pk=None, build_pk=None, record=True, # Catch unhandled errors in the setup step except Exception as e: # noqa + extra = { + 'stack': True, + 'tags': { + 'build': build_pk, + }, + } + if self.project: + extra['tags']['project'] = self.project.slug + if self.version: + extra['tags']['version'] = self.version.slug log.exception( 'An unhandled exception was raised during build setup', - extra={ - 'stack': True, - 'tags': { - 'build': build_pk, - 'project': self.project.slug, - 'version': self.version.slug, - }, - }, + extra=extra ) if self.setup_env is not None: self.setup_env.failure = BuildEnvironmentError( @@ -1027,6 +1034,7 @@ def fileify(version_pk, commit): ) ) _manage_imported_files(version, path, commit) + _update_intersphinx_data(version, path, commit) else: log.info( LOG_TEMPLATE.format( @@ -1037,6 +1045,56 @@ def fileify(version_pk, commit): ) +def _update_intersphinx_data(version, path, commit): + """ + Update intersphinx data for this version + + :param version: Version instance + :param path: Path to search + :param commit: Commit that updated path + """ + object_file = os.path.join(path, 'objects.inv') + + class MockConfig: + intersphinx_timeout = None # type: int + tls_verify = False + + class MockApp: + srcdir = '' + config = MockConfig() + + def warn(self, msg): + # type: (unicode) -> None + print(msg, file=sys.stderr) + + invdata = intersphinx.fetch_inventory(MockApp(), '', object_file) + for key in sorted(invdata or {}): + domain, _type = key.split(':') + for name, einfo in sorted(invdata[key].items()): + url = einfo[2] + if '#' in url: + doc_name, anchor = url.split('#') + else: + doc_name, anchor = url, '' + display_name = einfo[3] + obj, _ = DomainData.objects.get_or_create( + project=version.project, + version=version, + domain=domain, + name=name, + display_name=display_name, + type=_type, + doc_name=doc_name, + anchor=anchor, + ) + if obj.commit != commit: + obj.commit = commit + obj.save() + DomainData.objects.filter(project=version.project, + version=version + ).exclude(commit=commit).delete() + + def _manage_imported_files(version, path, commit): """ Update imported files for version. From 537c66adefb98c45b3ca3b8c202dcb781848e7b2 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Wed, 30 Jan 2019 20:05:46 -0500 Subject: [PATCH 05/22] Cleanup of some domain stuff --- readthedocs/domaindata/admin.py | 1 + readthedocs/domaindata/api.py | 13 ------------- readthedocs/domaindata/models.py | 19 ++++++++++++++++++- 3 files changed, 19 insertions(+), 14 deletions(-) diff --git a/readthedocs/domaindata/admin.py b/readthedocs/domaindata/admin.py index 3485ee1c572..9fe25b7012a 100644 --- a/readthedocs/domaindata/admin.py +++ b/readthedocs/domaindata/admin.py @@ -4,6 +4,7 @@ class DomainDataAdmin(admin.ModelAdmin): list_filter = ('type', 'project') + raw_id_fields = ('project', 'version') admin.site.register(DomainData, DomainDataAdmin) diff --git a/readthedocs/domaindata/api.py b/readthedocs/domaindata/api.py index d3176d1a396..5499e931260 100644 --- a/readthedocs/domaindata/api.py +++ b/readthedocs/domaindata/api.py @@ -1,30 +1,17 @@ from rest_framework import serializers from readthedocs.restapi.views.model_views import UserSelectViewSet -from readthedocs.core.resolver import resolve from .models import DomainData class DomainDataSerializer(serializers.ModelSerializer): project = serializers.SlugRelatedField(slug_field='slug', read_only=True) version = serializers.SlugRelatedField(slug_field='slug', read_only=True) - doc_type = serializers.SerializerMethodField() - doc_url = serializers.SerializerMethodField() class Meta: model = DomainData fields = ('project', 'version', 'name', 'display_name', 'doc_type', 'doc_url') - def get_doc_type(self, obj): - return f'{obj.domain}:{obj.type}' - - def get_doc_url(self, obj): - path = obj.doc_name - if obj.anchor: - path += f'#{obj.anchor}' - full_url = resolve(project=obj.project, version_slug=obj.version.slug, filename=path) - return full_url - class DomainDataAdminSerializer(DomainDataSerializer): diff --git a/readthedocs/domaindata/models.py b/readthedocs/domaindata/models.py index 9271708821f..9f3002bb247 100644 --- a/readthedocs/domaindata/models.py +++ b/readthedocs/domaindata/models.py @@ -3,6 +3,7 @@ from django.utils.translation import ugettext_lazy as _ from readthedocs.builds.models import Version +from readthedocs.core.resolver import resolve from readthedocs.projects.models import Project from readthedocs.projects.querysets import RelatedProjectQuerySet @@ -52,4 +53,20 @@ class DomainData(models.Model): objects = RelatedProjectQuerySet.as_manager() def __str__(self): - return f'DomainData [{self.project.slug}:{self.version.slug}] [{self.domain}:{self.type}] {self.name} -> {self.doc_name}#{self.anchor}' + return f''' + DomainData [{self.project.slug}:{self.version.slug}] + [{self.domain}:{self.type}] {self.name} -> {self.doc_name}#{self.anchor} + ''' + + @property + def doc_type(self): + return f'{self.domain}:{self.type}' + + @property + def doc_url(self): + path = self.doc_name + if self.anchor: + path += f'#{self.anchor}' + full_url = resolve( + project=self.project, version_slug=self.version.slug, filename=path) + return full_url From 647ae3f44a91ded8eed70210303a90837f3b0957 Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Wed, 30 Jan 2019 20:05:54 -0500 Subject: [PATCH 06/22] Add search to Domains :tada: --- readthedocs/search/documents.py | 29 +++++++++++++++ readthedocs/search/faceted_search.py | 16 +++++++-- readthedocs/search/signals.py | 3 +- readthedocs/search/views.py | 10 +++++- readthedocs/settings/base.py | 6 ++++ .../templates/search/elastic_search.html | 35 ++++++++++++++++++- 6 files changed, 94 insertions(+), 5 deletions(-) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 779d0a52340..c68f1e4c0b4 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -4,6 +4,7 @@ from django_elasticsearch_dsl import DocType, Index, fields from readthedocs.projects.models import Project, HTMLFile +from readthedocs.domaindata.models import DomainData project_conf = settings.ES_INDEXES['project'] project_index = Index(project_conf['name']) @@ -13,10 +14,38 @@ page_index = Index(page_conf['name']) page_index.settings(**page_conf['settings']) +domain_conf = settings.ES_INDEXES['domain'] +domain_index = Index(domain_conf['name']) +domain_index.settings(**domain_conf['settings']) log = logging.getLogger(__name__) +@domain_index.doc_type +class DomainDocument(DocType): + project = fields.KeywordField(attr='project.slug') + version = fields.KeywordField(attr='version.slug') + doc_type = fields.KeywordField(attr='doc_type') + + class Meta(object): + model = DomainData + fields = ('name', 'display_name', 'doc_name', 'anchor') + ignore_signals = True + + @classmethod + def faceted_search(cls, query, user, doc_type=None): + from readthedocs.search.faceted_search import DomainSearch + kwargs = { + 'user': user, + 'query': query, + } + + if doc_type: + kwargs['filters'] = {'doc_type': doc_type} + + return DomainSearch(**kwargs) + + @project_index.doc_type class ProjectDocument(DocType): diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 31c4a34aa25..47f083a291b 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -3,8 +3,8 @@ from elasticsearch_dsl import FacetedSearch, TermsFacet from elasticsearch_dsl.query import SimpleQueryString, Bool -from readthedocs.search.documents import PageDocument, ProjectDocument -from readthedocs.search.signals import before_file_search, before_project_search +from readthedocs.search.documents import PageDocument, ProjectDocument, DomainDocument +from readthedocs.search.signals import before_file_search, before_project_search, before_domain_search log = logging.getLogger(__name__) @@ -50,6 +50,18 @@ def query(self, search, query): return search +class DomainSearch(RTDFacetedSearch): + facets = { + 'project': TermsFacet(field='project'), + 'version': TermsFacet(field='version'), + 'doc_type': TermsFacet(field='doc_type'), + } + signal = before_domain_search + doc_types = [DomainDocument] + index = DomainDocument._doc_type.index + fields = ('display_name^5', 'name') + + class ProjectSearch(RTDFacetedSearch): facets = { 'language': TermsFacet(field='language') diff --git a/readthedocs/search/signals.py b/readthedocs/search/signals.py index 93a102ca526..cafce4b9ea0 100644 --- a/readthedocs/search/signals.py +++ b/readthedocs/search/signals.py @@ -11,8 +11,9 @@ from readthedocs.search.tasks import index_objects_to_es, delete_objects_in_es -before_project_search = django.dispatch.Signal(providing_args=['user', 'search']) +before_domain_search = django.dispatch.Signal(providing_args=['user', 'search']) before_file_search = django.dispatch.Signal(providing_args=['user', 'search']) +before_project_search = django.dispatch.Signal(providing_args=['user', 'search']) @receiver(bulk_post_create, sender=HTMLFile) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 2f9f7bf7cf5..04bd23fff68 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,7 +8,7 @@ from django.shortcuts import get_object_or_404, render from readthedocs.builds.constants import LATEST -from readthedocs.search.documents import PageDocument, ProjectDocument +from readthedocs.search.documents import PageDocument, ProjectDocument, DomainDocument from readthedocs.search.utils import get_project_list_or_404 from readthedocs.projects.models import Project @@ -25,6 +25,7 @@ 'version', 'taxonomy', 'language', + 'doc_type', ), ) @@ -38,6 +39,7 @@ def elastic_search(request): version=request.GET.get('version', LATEST), taxonomy=request.GET.get('taxonomy'), language=request.GET.get('language'), + doc_type=request.GET.get('doc_type'), ) results = '' user = '' @@ -53,6 +55,12 @@ def elastic_search(request): ) results = project_search.execute() facets = results.facets + elif user_input.type == 'domain': + project_search = DomainDocument.faceted_search( + query=user_input.query, user=user, doc_type=user_input.doc_type + ) + results = project_search.execute() + facets = results.facets elif user_input.type == 'file': kwargs = {} if user_input.project: diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py index a4622e6a16c..c009cbb62bd 100644 --- a/readthedocs/settings/base.py +++ b/readthedocs/settings/base.py @@ -358,6 +358,12 @@ def USE_PROMOS(self): # noqa ES_TASK_CHUNK_SIZE = 100 ES_INDEXES = { + 'domain': { + 'name': 'domain_index', + 'settings': {'number_of_shards': 2, + 'number_of_replicas': 0 + } + }, 'project': { 'name': 'project_index', 'settings': {'number_of_shards': 2, diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index f46ef4f3a17..1c00311d4a4 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -19,6 +19,7 @@
  • {% trans 'Projects' %}
  • {% trans 'Files' %}
  • +
  • {% trans 'Domain objects' %}

  • @@ -41,6 +42,27 @@
    {% trans 'Language' %}
    {% endif %} + + {% if type == 'domain' %} + + {% if facets.doc_type %} +
    {% trans 'Type' %}
    + {% for name, count, selected in facets.doc_type %} +
  • + {% if doc_type == name %} + {{ name }} + {% else %} + {{ name }} + {% endif %} + ({{ count }}) + +
  • + {% endfor %} + {% endif %} + + {% endif %} + + {% if type == 'file' %} {% if facets.project %} @@ -124,7 +146,7 @@

    {% blocktrans with query=query|default:"" %}Results for {{ query }}{% endblo {% for result in results %}
  • - {% if result.name %} + {% if result.language %} {# Project #} {{ result.name }} ({{ result.slug }}) @@ -140,6 +162,17 @@

    {% blocktrans with query=query|default:"" %}Results for {{ query }}{% endblo {% endfor %} {# End Project #} + {% elif result.doc_type %} + + {# Domain Data #} + {{ result.project }} - {{ result.doc_type }} - {{ result.name|safe }} + {% for fragment in result.meta.highlight.display_name|slice:":3" %} +

    + ...{{ fragment|safe }}... +

    + {% endfor %} + {# End File #} + {% elif result.path %} {# File #} From 1dfabe84660b129379b8a0b8e4cb21ce862d0e3d Mon Sep 17 00:00:00 2001 From: Eric Holscher Date: Thu, 31 Jan 2019 14:22:13 -0500 Subject: [PATCH 07/22] Add initial search implementation to DomainData --- readthedocs/domaindata/admin.py | 1 + readthedocs/search/faceted_search.py | 45 +- .../static/search/readthedocs-client.js | 804 ++++++++++++++++++ readthedocs/search/views.py | 15 +- .../templates/search/elastic_search.html | 34 +- 5 files changed, 892 insertions(+), 7 deletions(-) create mode 100644 readthedocs/search/static/search/readthedocs-client.js diff --git a/readthedocs/domaindata/admin.py b/readthedocs/domaindata/admin.py index 9fe25b7012a..d24542669f9 100644 --- a/readthedocs/domaindata/admin.py +++ b/readthedocs/domaindata/admin.py @@ -5,6 +5,7 @@ class DomainDataAdmin(admin.ModelAdmin): list_filter = ('type', 'project') raw_id_fields = ('project', 'version') + search_fields = ('doc_name', 'name') admin.site.register(DomainData, DomainDataAdmin) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index e88db0f6cb4..067f87d0fe9 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,10 +1,20 @@ +# -*- coding: utf-8 -*- import logging from elasticsearch_dsl import FacetedSearch, TermsFacet from elasticsearch_dsl.query import Bool, SimpleQueryString -from readthedocs.search.documents import PageDocument, ProjectDocument, DomainDocument -from readthedocs.search.signals import before_file_search, before_project_search, before_domain_search +from readthedocs.search.documents import ( + DomainDocument, + PageDocument, + ProjectDocument, +) +from readthedocs.search.signals import ( + before_domain_search, + before_file_search, + before_project_search, +) + log = logging.getLogger(__name__) @@ -62,6 +72,27 @@ class DomainSearch(RTDFacetedSearch): index = DomainDocument._doc_type.index fields = ('display_name^5', 'name') + def query(self, search, query): + """Use a custom SimpleQueryString instead of default query.""" + + search = super().query(search, query) + + all_queries = [] + + # need to search for both 'and' and 'or' operations + # the score of and should be higher as it satisfies both or and and + for operator in ['and', 'or']: + query_string = SimpleQueryString( + query=query, fields=self.fields, default_operator=operator + ) + all_queries.append(query_string) + + # run bool query with should, so it returns result where either of the query matches + bool_query = Bool(should=all_queries) + + search = search.query(bool_query) + return search + class ProjectSearch(RTDFacetedSearch): facets = {'language': TermsFacet(field='language')} @@ -101,3 +132,13 @@ def query(self, search, query): search = search.query(bool_query) return search + + +class AllSearch(RTDFacetedSearch): + facets = { + 'project': TermsFacet(field='project'), + 'version': TermsFacet(field='version') + } + signal = before_project_search + doc_types = [DomainDocument, PageDocument, ProjectDocument] + index = ['page_index', 'domain_index', 'project_index'] diff --git a/readthedocs/search/static/search/readthedocs-client.js b/readthedocs/search/static/search/readthedocs-client.js new file mode 100644 index 00000000000..8d5620d2f26 --- /dev/null +++ b/readthedocs/search/static/search/readthedocs-client.js @@ -0,0 +1,804 @@ +(function e(t,n,r){function s(o,u){if(!n[o]){if(!t[o]){var a=typeof require=="function"&&require;if(!u&&a)return a(o,!0);if(i)return i(o,!0);throw new Error("Cannot find module '"+o+"'")}var f=n[o]={exports:{}};t[o][0].call(f.exports,function(e){var n=t[o][1][e];return s(n?n:e)},f,f.exports,e,t,n,r)}return n[o].exports}var i=typeof require=="function"&&require;for(var o=0;o 0) { + self._completeHandlers.shift()(resp) + } + } + + function success (resp) { + var type = o['type'] || resp && setType(resp.getResponseHeader('Content-Type')) // resp can be undefined in IE + resp = (type !== 'jsonp') ? self.request : resp + // use global data filter on response text + var filteredResponse = globalSetupOptions.dataFilter(resp.responseText, type) + , r = filteredResponse + try { + resp.responseText = r + } catch (e) { + // can't assign this in IE<=8, just ignore + } + if (r) { + switch (type) { + case 'json': + try { + resp = win.JSON ? win.JSON.parse(r) : eval('(' + r + ')') + } catch (err) { + return error(resp, 'Could not parse JSON in response', err) + } + break + case 'js': + resp = eval(r) + break + case 'html': + resp = r + break + case 'xml': + resp = resp.responseXML + && resp.responseXML.parseError // IE trololo + && resp.responseXML.parseError.errorCode + && resp.responseXML.parseError.reason + ? null + : resp.responseXML + break + } + } + + self._responseArgs.resp = resp + self._fulfilled = true + fn(resp) + self._successHandler(resp) + while (self._fulfillmentHandlers.length > 0) { + resp = self._fulfillmentHandlers.shift()(resp) + } + + complete(resp) + } + + function timedOut() { + self._timedOut = true + self.request.abort() + } + + function error(resp, msg, t) { + resp = self.request + self._responseArgs.resp = resp + self._responseArgs.msg = msg + self._responseArgs.t = t + self._erred = true + while (self._errorHandlers.length > 0) { + self._errorHandlers.shift()(resp, msg, t) + } + complete(resp) + } + + this.request = getRequest.call(this, success, error) + } + + Reqwest.prototype = { + abort: function () { + this._aborted = true + this.request.abort() + } + + , retry: function () { + init.call(this, this.o, this.fn) + } + + /** + * Small deviation from the Promises A CommonJs specification + * http://wiki.commonjs.org/wiki/Promises/A + */ + + /** + * `then` will execute upon successful requests + */ + , then: function (success, fail) { + success = success || function () {} + fail = fail || function () {} + if (this._fulfilled) { + this._responseArgs.resp = success(this._responseArgs.resp) + } else if (this._erred) { + fail(this._responseArgs.resp, this._responseArgs.msg, this._responseArgs.t) + } else { + this._fulfillmentHandlers.push(success) + this._errorHandlers.push(fail) + } + return this + } + + /** + * `always` will execute whether the request succeeds or fails + */ + , always: function (fn) { + if (this._fulfilled || this._erred) { + fn(this._responseArgs.resp) + } else { + this._completeHandlers.push(fn) + } + return this + } + + /** + * `fail` will execute when the request fails + */ + , fail: function (fn) { + if (this._erred) { + fn(this._responseArgs.resp, this._responseArgs.msg, this._responseArgs.t) + } else { + this._errorHandlers.push(fn) + } + return this + } + , 'catch': function (fn) { + return this.fail(fn) + } + } + + function reqwest(o, fn) { + return new Reqwest(o, fn) + } + + // normalize newline variants according to spec -> CRLF + function normalize(s) { + return s ? s.replace(/\r?\n/g, '\r\n') : '' + } + + function serial(el, cb) { + var n = el.name + , t = el.tagName.toLowerCase() + , optCb = function (o) { + // IE gives value="" even where there is no value attribute + // 'specified' ref: http://www.w3.org/TR/DOM-Level-3-Core/core.html#ID-862529273 + if (o && !o['disabled']) + cb(n, normalize(o['attributes']['value'] && o['attributes']['value']['specified'] ? o['value'] : o['text'])) + } + , ch, ra, val, i + + // don't serialize elements that are disabled or without a name + if (el.disabled || !n) return + + switch (t) { + case 'input': + if (!/reset|button|image|file/i.test(el.type)) { + ch = /checkbox/i.test(el.type) + ra = /radio/i.test(el.type) + val = el.value + // WebKit gives us "" instead of "on" if a checkbox has no value, so correct it here + ;(!(ch || ra) || el.checked) && cb(n, normalize(ch && val === '' ? 'on' : val)) + } + break + case 'textarea': + cb(n, normalize(el.value)) + break + case 'select': + if (el.type.toLowerCase() === 'select-one') { + optCb(el.selectedIndex >= 0 ? el.options[el.selectedIndex] : null) + } else { + for (i = 0; el.length && i < el.length; i++) { + el.options[i].selected && optCb(el.options[i]) + } + } + break + } + } + + // collect up all form elements found from the passed argument elements all + // the way down to child elements; pass a '
    ' or form fields. + // called with 'this'=callback to use for serial() on each element + function eachFormElement() { + var cb = this + , e, i + , serializeSubtags = function (e, tags) { + var i, j, fa + for (i = 0; i < tags.length; i++) { + fa = e[byTag](tags[i]) + for (j = 0; j < fa.length; j++) serial(fa[j], cb) + } + } + + for (i = 0; i < arguments.length; i++) { + e = arguments[i] + if (/input|select|textarea/i.test(e.tagName)) serial(e, cb) + serializeSubtags(e, [ 'input', 'select', 'textarea' ]) + } + } + + // standard query string style serialization + function serializeQueryString() { + return reqwest.toQueryString(reqwest.serializeArray.apply(null, arguments)) + } + + // { 'name': 'value', ... } style serialization + function serializeHash() { + var hash = {} + eachFormElement.apply(function (name, value) { + if (name in hash) { + hash[name] && !isArray(hash[name]) && (hash[name] = [hash[name]]) + hash[name].push(value) + } else hash[name] = value + }, arguments) + return hash + } + + // [ { name: 'name', value: 'value' }, ... ] style serialization + reqwest.serializeArray = function () { + var arr = [] + eachFormElement.apply(function (name, value) { + arr.push({name: name, value: value}) + }, arguments) + return arr + } + + reqwest.serialize = function () { + if (arguments.length === 0) return '' + var opt, fn + , args = Array.prototype.slice.call(arguments, 0) + + opt = args.pop() + opt && opt.nodeType && args.push(opt) && (opt = null) + opt && (opt = opt.type) + + if (opt == 'map') fn = serializeHash + else if (opt == 'array') fn = reqwest.serializeArray + else fn = serializeQueryString + + return fn.apply(null, args) + } + + reqwest.toQueryString = function (o, trad) { + var prefix, i + , traditional = trad || false + , s = [] + , enc = encodeURIComponent + , add = function (key, value) { + // If value is a function, invoke it and return its value + value = ('function' === typeof value) ? value() : (value == null ? '' : value) + s[s.length] = enc(key) + '=' + enc(value) + } + // If an array was passed in, assume that it is an array of form elements. + if (isArray(o)) { + for (i = 0; o && i < o.length; i++) add(o[i]['name'], o[i]['value']) + } else { + // If traditional, encode the "old" way (the way 1.3.2 or older + // did it), otherwise encode params recursively. + for (prefix in o) { + if (o.hasOwnProperty(prefix)) buildParams(prefix, o[prefix], traditional, add) + } + } + + // spaces should be + according to spec + return s.join('&').replace(/%20/g, '+') + } + + function buildParams(prefix, obj, traditional, add) { + var name, i, v + , rbracket = /\[\]$/ + + if (isArray(obj)) { + // Serialize array item. + for (i = 0; obj && i < obj.length; i++) { + v = obj[i] + if (traditional || rbracket.test(prefix)) { + // Treat each array item as a scalar. + add(prefix, v) + } else { + buildParams(prefix + '[' + (typeof v === 'object' ? i : '') + ']', v, traditional, add) + } + } + } else if (obj && obj.toString() === '[object Object]') { + // Serialize object item. + for (name in obj) { + buildParams(prefix + '[' + name + ']', obj[name], traditional, add) + } + + } else { + // Serialize scalar item. + add(prefix, obj) + } + } + + reqwest.getcallbackPrefix = function () { + return callbackPrefix + } + + // jQuery and Zepto compatibility, differences can be remapped here so you can call + // .ajax.compat(options, callback) + reqwest.compat = function (o, fn) { + if (o) { + o['type'] && (o['method'] = o['type']) && delete o['type'] + o['dataType'] && (o['type'] = o['dataType']) + o['jsonpCallback'] && (o['jsonpCallbackName'] = o['jsonpCallback']) && delete o['jsonpCallback'] + o['jsonp'] && (o['jsonpCallback'] = o['jsonp']) + } + return new Reqwest(o, fn) + } + + reqwest.ajaxSetup = function (options) { + options = options || {} + for (var k in options) { + globalSetupOptions[k] = options[k] + } + } + + return reqwest +}); + +},{}],2:[function(require,module,exports){ +// Document response + +// Page +var Page = function (project, version, doc) { + this.project = project; + this.version = version; + this.doc = doc; + + this.url = null; + this.sections = []; +}; + +Page.prototype.section = function (section) { + return new Section(this.project, this.version, this.doc, section); +}; + +// Section +var Section = function (project, version, doc, section) { + this.project = project; + this.version = version; + this.doc = doc; + this.section = section; + + this.url = null; + this.content = null; + this.wrapped = null; +} + +// Add iframe with returned content to page +Section.prototype.insertContent = function (elem) { + var iframe = document.createElement('iframe'), + self = this; + + iframe.style.display = 'none'; + + if (window.jQuery && elem instanceof window.jQuery) { + elem = elem.get(0); + } + + if (typeof(elem) != 'undefined') { + while (elem.children.length > 0) { + elem.firstChild.remove(); + } + elem.appendChild(iframe); + } + + var win = iframe.contentWindow; + + win.document.open(); + win.document.write(this.content); + win.document.close(); + + var head = win.document.head, + body = win.document.body, + base = null; + + if (head) { + base = win.document.createElement('base'); + base.target = '_parent'; + base.href = this.url; + head.appendChild(base); + + // Copy linked stylesheets from parent + var link_elems = document.head.getElementsByTagName('link'); + for (var n = 0; n < link_elems.length; n++) { + var link = link_elems[n]; + if (link.rel == 'stylesheet') { + head.appendChild(link.cloneNode()); + } + } + } + + win.onload = function () { + iframe.style.display = 'inline-block'; + }; + + return iframe; +}; + + +exports.Section = Section; +exports.Page = Page; + +},{}],3:[function(require,module,exports){ +/* Read the Docs Embed functions */ + +var doc = require('./doc'), + Section = doc.Section, + Page = doc.Page; + + +var Embed = function (config) { + this._api_host = 'https://api.grokthedocs.com'; + if (typeof config == 'object') { + if ('api_host' in config) { + this._api_host = config['api_host']; + } + } +}; + +Embed.prototype.section = function (project, version, doc, section, + callback, error_callback) { + callback = callback || function () {}; + error_callback = error_callback || function () {}; + + var self = this, + data = { + 'project': project, + 'version': version, + 'doc': doc, + 'section': section + }; + + this._getObject( + data, + function (resp) { + var section_ret = new Section(project, version, doc, section); + section_ret.url = resp.url; + section_ret.content = resp.content; + section_ret.wrapped = resp.wrapped; + callback(section_ret); + }, + function (error, msg) { + error_callback(error); + } + ); +}; + +Embed.prototype.page = function (project, version, doc, callback, + error_callback) { + + var self = this, + data = { + 'project': project, + 'version': version, + 'doc': doc, + }; + + this._getObject( + data, + function (resp) { + var page = new Page(project, version, doc); + page.url = resp.url; + // TODO headers is misleading here, rename it on the API + page.sections = resp.headers; + callback(page); + }, + function (error, msg) { + error_callback(error); + } + ) +}; + +Embed.prototype._getObject = function (data, callback, error_callback) { + var self = this, + reqwest = require("./../bower_components/reqwest/reqwest.js"); + callback = callback || function () {}; + error_callback = error_callback || function () {}; + + return reqwest({ + url: this._api_host + '/api/v1/embed/', + method: 'get', + contentType: 'application/json', + crossDomain: true, + headers: {'Accept': 'application/json'}, + data: data, + success: callback, + error: error_callback + }); +}; + +exports.Embed = Embed; + +},{"./../bower_components/reqwest/reqwest.js":1,"./doc":2}],4:[function(require,module,exports){ +/* Read the Docs Client */ + +var embed = require('./embed'); + + +exports.Embed = embed.Embed; + +if (typeof window != 'undefined') { + window.Embed = embed.Embed; +} + +},{"./embed":3}]},{},[4]) \ No newline at end of file diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 04bd23fff68..5ed00fc38c0 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -8,9 +8,14 @@ from django.shortcuts import get_object_or_404, render from readthedocs.builds.constants import LATEST -from readthedocs.search.documents import PageDocument, ProjectDocument, DomainDocument -from readthedocs.search.utils import get_project_list_or_404 from readthedocs.projects.models import Project +from readthedocs.search.documents import ( + DomainDocument, + PageDocument, + ProjectDocument, +) +from readthedocs.search.faceted_search import AllSearch +from readthedocs.search.utils import get_project_list_or_404 log = logging.getLogger(__name__) @@ -77,6 +82,12 @@ def elastic_search(request): ) results = page_search.execute() facets = results.facets + elif user_input.type == 'all': + project_search = AllSearch( + query=user_input.query, user=user + ) + results = project_search.execute() + facets = results.facets log.info( LOG_TEMPLATE.format( diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index 1c00311d4a4..2b5a21d9265 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -1,6 +1,6 @@ {% extends "projects/base_project.html" %} -{% load core_tags i18n %} +{% load core_tags i18n static %} {% block title %}{% blocktrans with query=query|default:"" %}Search: {{ query }} {% endblocktrans %}{% endblock %} @@ -13,6 +13,31 @@ {% endblock %} +{% block extra_scripts %} + + + +{% endblock %} + {% block content %}