From 59775aae47d0a23119d80cbf1288050f3b2d93d6 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 21 Sep 2022 18:10:53 -0500 Subject: [PATCH 01/35] Convert api.py to module api/v2 --- readthedocs/api/v2/proxied_urls.py | 2 +- readthedocs/search/api/__init__.py | 101 +++++++++++++++ readthedocs/search/api/v2/__init__.py | 0 .../search/{ => api/v2}/serializers.py | 0 readthedocs/search/api/v2/urls.py | 6 + .../search/{api.py => api/v2/views.py} | 116 +++--------------- readthedocs/search/proxied_api.py | 12 -- readthedocs/urls.py | 3 +- 8 files changed, 124 insertions(+), 116 deletions(-) create mode 100644 readthedocs/search/api/__init__.py create mode 100644 readthedocs/search/api/v2/__init__.py rename readthedocs/search/{ => api/v2}/serializers.py (100%) create mode 100644 readthedocs/search/api/v2/urls.py rename readthedocs/search/{api.py => api/v2/views.py} (72%) delete mode 100644 readthedocs/search/proxied_api.py diff --git a/readthedocs/api/v2/proxied_urls.py b/readthedocs/api/v2/proxied_urls.py index cb5bea2cdc7..fb6c74044a4 100644 --- a/readthedocs/api/v2/proxied_urls.py +++ b/readthedocs/api/v2/proxied_urls.py @@ -9,7 +9,7 @@ from readthedocs.analytics.proxied_api import AnalyticsView from readthedocs.api.v2.views.proxied import ProxiedEmbedAPI, ProxiedFooterHTML -from readthedocs.search.proxied_api import ProxiedPageSearchAPIView +from readthedocs.search.api.v2.views import ProxiedPageSearchAPIView api_footer_urls = [ re_path(r'footer_html/', ProxiedFooterHTML.as_view(), name='footer_html'), diff --git a/readthedocs/search/api/__init__.py b/readthedocs/search/api/__init__.py new file mode 100644 index 00000000000..ebc0215786e --- /dev/null +++ b/readthedocs/search/api/__init__.py @@ -0,0 +1,101 @@ +from collections import namedtuple +from django.utils.translation import gettext as _ +from rest_framework.pagination import PageNumberPagination +from rest_framework.exceptions import NotFound +from math import ceil + + +class PaginatorPage: + + """ + Mimics the result from a paginator. + + By using this class, we avoid having to override a lot of methods + of `PageNumberPagination` to make it work with the ES DSL object. + """ + + def __init__(self, page_number, total_pages, count): + self.number = page_number + Paginator = namedtuple('Paginator', ['num_pages', 'count']) + self.paginator = Paginator(total_pages, count) + + def has_next(self): + return self.number < self.paginator.num_pages + + def has_previous(self): + return self.number > 1 + + def next_page_number(self): + return self.number + 1 + + def previous_page_number(self): + return self.number - 1 + + +class SearchPagination(PageNumberPagination): + + """Paginator for the results of PageSearch.""" + + page_size = 50 + page_size_query_param = 'page_size' + max_page_size = 100 + + def _get_page_number(self, number): + try: + if isinstance(number, float) and not number.is_integer(): + raise ValueError + number = int(number) + except (TypeError, ValueError): + number = -1 + return number + + def paginate_queryset(self, queryset, request, view=None): + """ + Override to get the paginated result from the ES queryset. + + This makes use of our custom paginator and slicing support from the ES DSL object, + instead of the one used by django's ORM. + + Mostly inspired by https://github.com/encode/django-rest-framework/blob/acbd9d8222e763c7f9c7dc2de23c430c702e06d4/rest_framework/pagination.py#L191 # noqa + """ + # Needed for other methods of this class. + self.request = request + + page_size = self.get_page_size(request) + page_number = request.query_params.get(self.page_query_param, 1) + + original_page_number = page_number + page_number = self._get_page_number(page_number) + + if page_number <= 0: + msg = self.invalid_page_message.format( + page_number=original_page_number, + message=_("Invalid page"), + ) + raise NotFound(msg) + + start = (page_number - 1) * page_size + end = page_number * page_size + + result = [] + total_count = 0 + total_pages = 1 + + if queryset: + result = queryset[start:end].execute() + total_count = result.hits.total['value'] + hits = max(1, total_count) + total_pages = ceil(hits / page_size) + + if total_pages > 1 and self.template is not None: + # The browsable API should display pagination controls. + self.display_page_controls = True + + # Needed for other methods of this class. + self.page = PaginatorPage( + page_number=page_number, + total_pages=total_pages, + count=total_count, + ) + + return result diff --git a/readthedocs/search/api/v2/__init__.py b/readthedocs/search/api/v2/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/search/serializers.py b/readthedocs/search/api/v2/serializers.py similarity index 100% rename from readthedocs/search/serializers.py rename to readthedocs/search/api/v2/serializers.py diff --git a/readthedocs/search/api/v2/urls.py b/readthedocs/search/api/v2/urls.py new file mode 100644 index 00000000000..b0b42b73505 --- /dev/null +++ b/readthedocs/search/api/v2/urls.py @@ -0,0 +1,6 @@ +from readthedocs.search.api.v2 import PageSearchAPIView +from django.urls import path + +urlpatterns = [ + path("", PageSearchAPIView.as_view(), name="search_api"), +] diff --git a/readthedocs/search/api.py b/readthedocs/search/api/v2/views.py similarity index 72% rename from readthedocs/search/api.py rename to readthedocs/search/api/v2/views.py index af771cb8ff6..815149d7774 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api/v2/views.py @@ -1,13 +1,11 @@ -from functools import lru_cache, namedtuple -from math import ceil +from functools import lru_cache import structlog from django.shortcuts import get_object_or_404 from django.utils import timezone from django.utils.translation import gettext as _ -from rest_framework.exceptions import NotFound, ValidationError +from rest_framework.exceptions import ValidationError from rest_framework.generics import GenericAPIView -from rest_framework.pagination import PageNumberPagination from readthedocs.api.mixins import CDNCacheTagsMixin from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion @@ -15,108 +13,14 @@ from readthedocs.projects.models import Feature, Project from readthedocs.search import tasks from readthedocs.search.faceted_search import PageSearch +from readthedocs.search.api import SearchPagination +from readthedocs.core.utils.extend import SettingsOverrideObject -from .serializers import PageSearchSerializer, ProjectData, VersionData +from readthedocs.search.api.v2.serializers import PageSearchSerializer, ProjectData, VersionData log = structlog.get_logger(__name__) -class PaginatorPage: - - """ - Mimics the result from a paginator. - - By using this class, we avoid having to override a lot of methods - of `PageNumberPagination` to make it work with the ES DSL object. - """ - - def __init__(self, page_number, total_pages, count): - self.number = page_number - Paginator = namedtuple('Paginator', ['num_pages', 'count']) - self.paginator = Paginator(total_pages, count) - - def has_next(self): - return self.number < self.paginator.num_pages - - def has_previous(self): - return self.number > 1 - - def next_page_number(self): - return self.number + 1 - - def previous_page_number(self): - return self.number - 1 - - -class SearchPagination(PageNumberPagination): - - """Paginator for the results of PageSearch.""" - - page_size = 50 - page_size_query_param = 'page_size' - max_page_size = 100 - - def _get_page_number(self, number): - try: - if isinstance(number, float) and not number.is_integer(): - raise ValueError - number = int(number) - except (TypeError, ValueError): - number = -1 - return number - - def paginate_queryset(self, queryset, request, view=None): - """ - Override to get the paginated result from the ES queryset. - - This makes use of our custom paginator and slicing support from the ES DSL object, - instead of the one used by django's ORM. - - Mostly inspired by https://github.com/encode/django-rest-framework/blob/acbd9d8222e763c7f9c7dc2de23c430c702e06d4/rest_framework/pagination.py#L191 # noqa - """ - # Needed for other methods of this class. - self.request = request - - page_size = self.get_page_size(request) - page_number = request.query_params.get(self.page_query_param, 1) - - original_page_number = page_number - page_number = self._get_page_number(page_number) - - if page_number <= 0: - msg = self.invalid_page_message.format( - page_number=original_page_number, - message=_("Invalid page"), - ) - raise NotFound(msg) - - start = (page_number - 1) * page_size - end = page_number * page_size - - result = [] - total_count = 0 - total_pages = 1 - - if queryset: - result = queryset[start:end].execute() - total_count = result.hits.total['value'] - hits = max(1, total_count) - total_pages = ceil(hits / page_size) - - if total_pages > 1 and self.template is not None: - # The browsable API should display pagination controls. - self.display_page_controls = True - - # Needed for other methods of this class. - self.page = PaginatorPage( - page_number=page_number, - total_pages=total_pages, - count=total_count, - ) - - return result - - class PageSearchAPIView(CDNCacheTagsMixin, GenericAPIView): """ @@ -349,3 +253,13 @@ def list(self): ) serializer = self.get_serializer(page, many=True) return self.paginator.get_paginated_response(serializer.data) + + +class BaseProxiedPageSearchAPIView(PageSearchAPIView): + + pass + + +class ProxiedPageSearchAPIView(SettingsOverrideObject): + + _default_class = BaseProxiedPageSearchAPIView diff --git a/readthedocs/search/proxied_api.py b/readthedocs/search/proxied_api.py deleted file mode 100644 index 514c0aecbf3..00000000000 --- a/readthedocs/search/proxied_api.py +++ /dev/null @@ -1,12 +0,0 @@ -from readthedocs.core.utils.extend import SettingsOverrideObject -from .api import PageSearchAPIView - - -class BaseProxiedPageSearchAPIView(PageSearchAPIView): - - pass - - -class ProxiedPageSearchAPIView(SettingsOverrideObject): - - _default_class = BaseProxiedPageSearchAPIView diff --git a/readthedocs/urls.py b/readthedocs/urls.py index c1ffb85c534..ebbf0038e6c 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -15,7 +15,6 @@ do_not_track, server_error_500, ) -from readthedocs.search.api import PageSearchAPIView from readthedocs.search.views import GlobalSearchView admin.autodiscover() @@ -91,7 +90,7 @@ api_urls = [ re_path(r'^api/v2/', include('readthedocs.api.v2.urls')), # Keep `search_api` at root level, so the test does not fail for other API - re_path(r'^api/v2/search/$', PageSearchAPIView.as_view(), name='search_api'), + re_path(r'^api/v2/search/$', include("readthedocs.search.api.v2.urls")), # Deprecated re_path(r'^api/v1/embed/', include('readthedocs.embed.urls')), re_path(r'^api/v2/embed/', include('readthedocs.embed.urls')), From af8b887fe4d71c47bb6f3ae149ebf037768bbe31 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 21 Sep 2022 18:12:08 -0500 Subject: [PATCH 02/35] Black --- readthedocs/search/api/__init__.py | 11 ++-- readthedocs/search/api/v2/serializers.py | 73 ++++++++++++------------ readthedocs/search/api/v2/urls.py | 3 +- readthedocs/search/api/v2/views.py | 34 ++++++----- readthedocs/urls.py | 2 +- 5 files changed, 64 insertions(+), 59 deletions(-) diff --git a/readthedocs/search/api/__init__.py b/readthedocs/search/api/__init__.py index ebc0215786e..2800efc4da0 100644 --- a/readthedocs/search/api/__init__.py +++ b/readthedocs/search/api/__init__.py @@ -1,8 +1,9 @@ from collections import namedtuple +from math import ceil + from django.utils.translation import gettext as _ -from rest_framework.pagination import PageNumberPagination from rest_framework.exceptions import NotFound -from math import ceil +from rest_framework.pagination import PageNumberPagination class PaginatorPage: @@ -16,7 +17,7 @@ class PaginatorPage: def __init__(self, page_number, total_pages, count): self.number = page_number - Paginator = namedtuple('Paginator', ['num_pages', 'count']) + Paginator = namedtuple("Paginator", ["num_pages", "count"]) self.paginator = Paginator(total_pages, count) def has_next(self): @@ -37,7 +38,7 @@ class SearchPagination(PageNumberPagination): """Paginator for the results of PageSearch.""" page_size = 50 - page_size_query_param = 'page_size' + page_size_query_param = "page_size" max_page_size = 100 def _get_page_number(self, number): @@ -83,7 +84,7 @@ def paginate_queryset(self, queryset, request, view=None): if queryset: result = queryset[start:end].execute() - total_count = result.hits.total['value'] + total_count = result.hits.total["value"] hits = max(1, total_count) total_pages = ceil(hits / page_size) diff --git a/readthedocs/search/api/v2/serializers.py b/readthedocs/search/api/v2/serializers.py index 2dedf49ac77..6ae6cd21f3c 100644 --- a/readthedocs/search/api/v2/serializers.py +++ b/readthedocs/search/api/v2/serializers.py @@ -18,8 +18,8 @@ from readthedocs.projects.models import Project # Structures used for storing cached data of a version mostly. -ProjectData = namedtuple('ProjectData', ['version', 'alias']) -VersionData = namedtuple('VersionData', ['slug', 'docs_url']) +ProjectData = namedtuple("ProjectData", ["version", "alias"]) +VersionData = namedtuple("VersionData", ["slug", "docs_url"]) class ProjectHighlightSerializer(serializers.Serializer): @@ -29,23 +29,23 @@ class ProjectHighlightSerializer(serializers.Serializer): description = serializers.SerializerMethodField() def get_name(self, obj): - return list(getattr(obj, 'name', [])) + return list(getattr(obj, "name", [])) def get_slug(self, obj): - return list(getattr(obj, 'slug', [])) + return list(getattr(obj, "slug", [])) def get_description(self, obj): - return list(getattr(obj, 'description', [])) + return list(getattr(obj, "description", [])) class ProjectSearchSerializer(serializers.Serializer): - type = serializers.CharField(default='project', source=None, read_only=True) + type = serializers.CharField(default="project", source=None, read_only=True) name = serializers.CharField() slug = serializers.CharField() - link = serializers.CharField(source='url') + link = serializers.CharField(source="url") description = serializers.CharField() - highlights = ProjectHighlightSerializer(source='meta.highlight', default=dict) + highlights = ProjectHighlightSerializer(source="meta.highlight", default=dict) class PageHighlightSerializer(serializers.Serializer): @@ -53,7 +53,7 @@ class PageHighlightSerializer(serializers.Serializer): title = serializers.SerializerMethodField() def get_title(self, obj): - return list(getattr(obj, 'title', [])) + return list(getattr(obj, "title", [])) class PageSearchSerializer(serializers.Serializer): @@ -66,14 +66,14 @@ class PageSearchSerializer(serializers.Serializer): It's a dictionary mapping the project slug to a ProjectData object. """ - type = serializers.CharField(default='page', source=None, read_only=True) + type = serializers.CharField(default="page", source=None, read_only=True) project = serializers.CharField() project_alias = serializers.SerializerMethodField() version = serializers.CharField() title = serializers.CharField() path = serializers.SerializerMethodField() domain = serializers.SerializerMethodField() - highlights = PageHighlightSerializer(source='meta.highlight', default=dict) + highlights = PageHighlightSerializer(source="meta.highlight", default=dict) blocks = serializers.SerializerMethodField() def _get_project_data(self, obj): @@ -85,16 +85,18 @@ def _get_project_data(self, obj): If the result is fetched from the database, it's cached into ``projects_data``. """ - project_data = self.context.get('projects_data', {}).get(obj.project) + project_data = self.context.get("projects_data", {}).get(obj.project) if project_data: return project_data project = Project.objects.filter(slug=obj.project).first() if project: docs_url = project.get_docs_url(version_slug=obj.version) - project_alias = project.superprojects.values_list('alias', flat=True).first() + project_alias = project.superprojects.values_list( + "alias", flat=True + ).first() - projects_data = self.context.setdefault('projects_data', {}) + projects_data = self.context.setdefault("projects_data", {}) version_data = VersionData( slug=obj.version, docs_url=docs_url, @@ -116,7 +118,7 @@ def get_domain(self, obj): full_path = self._get_full_path(obj) if full_path: parsed = urlparse(full_path) - return f'{parsed.scheme}://{parsed.netloc}' + return f"{parsed.scheme}://{parsed.netloc}" return None def get_path(self, obj): @@ -136,16 +138,16 @@ def _get_full_path(self, obj): # and always end it with / so it goes directly to proxito. # For a generic doctype we just strip the index.html part if it exists. if obj.doctype in {SPHINX_HTMLDIR, MKDOCS, GENERIC}: - path = re.sub('(^|/)index.html$', '/', path) + path = re.sub("(^|/)index.html$", "/", path) - return docs_url.rstrip('/') + '/' + path.lstrip('/') + return docs_url.rstrip("/") + "/" + path.lstrip("/") return None def get_blocks(self, obj): """Combine and sort inner results (domains and sections).""" serializers = { - 'domain': DomainSearchSerializer, - 'section': SectionSearchSerializer, + "domain": DomainSearchSerializer, + "section": SectionSearchSerializer, } inner_hits = obj.meta.inner_hits @@ -154,19 +156,16 @@ def get_blocks(self, obj): # Make them identifiable before merging them for s in sections: - s.type = 'section' + s.type = "section" for d in domains: - d.type = 'domain' + d.type = "domain" sorted_results = sorted( itertools.chain(sections, domains), - key=attrgetter('meta.score'), + key=attrgetter("meta.score"), reverse=True, ) - sorted_results = [ - serializers[hit.type](hit).data - for hit in sorted_results - ] + sorted_results = [serializers[hit.type](hit).data for hit in sorted_results] return sorted_results @@ -176,20 +175,20 @@ class DomainHighlightSerializer(serializers.Serializer): content = serializers.SerializerMethodField() def get_name(self, obj): - return list(getattr(obj, 'domains.name', [])) + return list(getattr(obj, "domains.name", [])) def get_content(self, obj): - return list(getattr(obj, 'domains.docstrings', [])) + return list(getattr(obj, "domains.docstrings", [])) class DomainSearchSerializer(serializers.Serializer): - type = serializers.CharField(default='domain', source=None, read_only=True) - role = serializers.CharField(source='role_name') + type = serializers.CharField(default="domain", source=None, read_only=True) + role = serializers.CharField(source="role_name") name = serializers.CharField() - id = serializers.CharField(source='anchor') - content = serializers.CharField(source='docstrings') - highlights = DomainHighlightSerializer(source='meta.highlight', default=dict) + id = serializers.CharField(source="anchor") + content = serializers.CharField(source="docstrings") + highlights = DomainHighlightSerializer(source="meta.highlight", default=dict) class SectionHighlightSerializer(serializers.Serializer): @@ -198,16 +197,16 @@ class SectionHighlightSerializer(serializers.Serializer): content = serializers.SerializerMethodField() def get_title(self, obj): - return list(getattr(obj, 'sections.title', [])) + return list(getattr(obj, "sections.title", [])) def get_content(self, obj): - return list(getattr(obj, 'sections.content', [])) + return list(getattr(obj, "sections.content", [])) class SectionSearchSerializer(serializers.Serializer): - type = serializers.CharField(default='section', source=None, read_only=True) + type = serializers.CharField(default="section", source=None, read_only=True) id = serializers.CharField() title = serializers.CharField() content = serializers.CharField() - highlights = SectionHighlightSerializer(source='meta.highlight', default=dict) + highlights = SectionHighlightSerializer(source="meta.highlight", default=dict) diff --git a/readthedocs/search/api/v2/urls.py b/readthedocs/search/api/v2/urls.py index b0b42b73505..1249f6161e8 100644 --- a/readthedocs/search/api/v2/urls.py +++ b/readthedocs/search/api/v2/urls.py @@ -1,6 +1,7 @@ -from readthedocs.search.api.v2 import PageSearchAPIView from django.urls import path +from readthedocs.search.api.v2 import PageSearchAPIView + urlpatterns = [ path("", PageSearchAPIView.as_view(), name="search_api"), ] diff --git a/readthedocs/search/api/v2/views.py b/readthedocs/search/api/v2/views.py index 815149d7774..ad3612af9bb 100644 --- a/readthedocs/search/api/v2/views.py +++ b/readthedocs/search/api/v2/views.py @@ -10,13 +10,16 @@ from readthedocs.api.mixins import CDNCacheTagsMixin from readthedocs.api.v2.permissions import IsAuthorizedToViewVersion from readthedocs.builds.models import Version +from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.projects.models import Feature, Project from readthedocs.search import tasks -from readthedocs.search.faceted_search import PageSearch from readthedocs.search.api import SearchPagination -from readthedocs.core.utils.extend import SettingsOverrideObject - -from readthedocs.search.api.v2.serializers import PageSearchSerializer, ProjectData, VersionData +from readthedocs.search.api.v2.serializers import ( + PageSearchSerializer, + ProjectData, + VersionData, +) +from readthedocs.search.faceted_search import PageSearch log = structlog.get_logger(__name__) @@ -35,21 +38,21 @@ class PageSearchAPIView(CDNCacheTagsMixin, GenericAPIView): Check our [docs](https://docs.readthedocs.io/en/stable/server-side-search.html#api) for more information. """ # noqa - http_method_names = ['get'] + http_method_names = ["get"] permission_classes = [IsAuthorizedToViewVersion] pagination_class = SearchPagination serializer_class = PageSearchSerializer - project_cache_tag = 'rtd-search' + project_cache_tag = "rtd-search" @lru_cache(maxsize=1) def _get_project(self): - project_slug = self.request.GET.get('project', None) + project_slug = self.request.GET.get("project", None) project = get_object_or_404(Project, slug=project_slug) return project @lru_cache(maxsize=1) def _get_version(self): - version_slug = self.request.GET.get('version', None) + version_slug = self.request.GET.get("version", None) project = self._get_project() version = get_object_or_404( project.versions.all(), @@ -68,7 +71,7 @@ def _validate_query_params(self): :raises: ValidationError if one of them is missing. """ errors = {} - required_query_params = {'q', 'project', 'version'} + required_query_params = {"q", "project", "version"} request_params = set(self.request.query_params.keys()) missing_params = required_query_params - request_params for param in missing_params: @@ -161,8 +164,7 @@ def _get_project_version(self, project, version_slug, include_hidden=True): :param include_hidden: If hidden versions should be considered. """ return ( - Version.internal - .public( + Version.internal.public( user=self.request.user, project=project, only_built=True, @@ -188,7 +190,7 @@ def _get_search_query(self): def _record_query(self, response): project_slug = self._get_project().slug version_slug = self._get_version().slug - total_results = response.data.get('count', 0) + total_results = response.data.get("count", 0) time = timezone.now() query = self._get_search_query().lower().strip() @@ -222,7 +224,7 @@ def get_queryset(self): } # Check to avoid searching all projects in case it's empty. if not projects: - log.info('Unable to find a version to search') + log.info("Unable to find a version to search") return [] query = self._get_search_query() @@ -236,7 +238,7 @@ def get_queryset(self): def get_serializer_context(self): context = super().get_serializer_context() - context['projects_data'] = self._get_all_projects_data() + context["projects_data"] = self._get_all_projects_data() return context def get(self, request, *args, **kwargs): @@ -249,7 +251,9 @@ def list(self): """List the results using pagination.""" queryset = self.get_queryset() page = self.paginator.paginate_queryset( - queryset, self.request, view=self, + queryset, + self.request, + view=self, ) serializer = self.get_serializer(page, many=True) return self.paginator.get_paginated_response(serializer.data) diff --git a/readthedocs/urls.py b/readthedocs/urls.py index ebbf0038e6c..b378729e883 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -90,7 +90,7 @@ api_urls = [ re_path(r'^api/v2/', include('readthedocs.api.v2.urls')), # Keep `search_api` at root level, so the test does not fail for other API - re_path(r'^api/v2/search/$', include("readthedocs.search.api.v2.urls")), + re_path(r"^api/v2/search/$", include("readthedocs.search.api.v2.urls")), # Deprecated re_path(r'^api/v1/embed/', include('readthedocs.embed.urls')), re_path(r'^api/v2/embed/', include('readthedocs.embed.urls')), From 9f219b82be95f296361e48e5120a94ec97849772 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 21 Sep 2022 18:34:46 -0500 Subject: [PATCH 03/35] Fix tests --- readthedocs/search/api/v2/urls.py | 2 +- readthedocs/search/tests/test_api.py | 2 +- readthedocs/search/views.py | 11 +++-------- 3 files changed, 5 insertions(+), 10 deletions(-) diff --git a/readthedocs/search/api/v2/urls.py b/readthedocs/search/api/v2/urls.py index 1249f6161e8..4a3e10d0a23 100644 --- a/readthedocs/search/api/v2/urls.py +++ b/readthedocs/search/api/v2/urls.py @@ -1,6 +1,6 @@ from django.urls import path -from readthedocs.search.api.v2 import PageSearchAPIView +from readthedocs.search.api.v2.views import PageSearchAPIView urlpatterns = [ path("", PageSearchAPIView.as_view(), name="search_api"), diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 641a1217906..18db6e70b7b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -15,7 +15,7 @@ SPHINX_SINGLEHTML, ) from readthedocs.projects.models import Feature, HTMLFile, Project -from readthedocs.search.api import PageSearchAPIView +from readthedocs.search.api.v2.views import PageSearchAPIView from readthedocs.search.documents import PageDocument from readthedocs.search.tests.utils import ( DOMAIN_FIELDS, diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 758eee067de..b9963541a5f 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -1,25 +1,20 @@ """Search views.""" import collections -import structlog +import structlog from django.conf import settings from django.shortcuts import get_object_or_404, render from django.views import View from readthedocs.builds.constants import LATEST from readthedocs.projects.models import Feature, Project -from readthedocs.search.faceted_search import ( - ALL_FACETS, - PageSearch, - ProjectSearch, -) - -from .serializers import ( +from readthedocs.search.api.v2.serializers import ( PageSearchSerializer, ProjectData, ProjectSearchSerializer, VersionData, ) +from readthedocs.search.faceted_search import ALL_FACETS, PageSearch, ProjectSearch log = structlog.get_logger(__name__) From 485c200cf851750dba3bc49e6c7bada5ff00c875 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 26 Sep 2022 14:57:36 -0500 Subject: [PATCH 04/35] Search: refactor serializer's context We were generating the context of the serializer from outside, but we can do this from the serializer itself, this way we don't have to duplicate this logic. --- readthedocs/search/api/v2/serializers.py | 30 +++++++++- readthedocs/search/api/v2/views.py | 76 ++++-------------------- readthedocs/search/tests/test_api.py | 4 +- readthedocs/search/views.py | 25 +------- 4 files changed, 42 insertions(+), 93 deletions(-) diff --git a/readthedocs/search/api/v2/serializers.py b/readthedocs/search/api/v2/serializers.py index 6ae6cd21f3c..12e1434329a 100644 --- a/readthedocs/search/api/v2/serializers.py +++ b/readthedocs/search/api/v2/serializers.py @@ -61,9 +61,11 @@ class PageSearchSerializer(serializers.Serializer): """ Page serializer. - If ``projects_data`` is passed into the context, the serializer - will try to use that to generate the link before querying the database. - It's a dictionary mapping the project slug to a ProjectData object. + If ``projects`` is passed in the constructor, the serializer + will pre-generate a cache with that information, + this is to avoid querying the database again for each result. + + :param projects: A list of tuples of project and version. """ type = serializers.CharField(default="page", source=None, read_only=True) @@ -76,6 +78,28 @@ class PageSearchSerializer(serializers.Serializer): highlights = PageHighlightSerializer(source="meta.highlight", default=dict) blocks = serializers.SerializerMethodField() + def __init__(self, *args, projects=None, **kwargs): + if projects: + context = kwargs.setdefault("context", {}) + context["projects_data"] = { + project.slug: self._build_project_data(project, version.slug) + for project, version in projects + } + super().__init__(*args, **kwargs) + + def _build_project_data(self, project, version_slug): + """Build a `ProjectData` object given a project and its version.""" + url = project.get_docs_url(version_slug=version_slug) + project_alias = project.superprojects.values_list("alias", flat=True).first() + version_data = VersionData( + slug=version_slug, + docs_url=url, + ) + return ProjectData( + alias=project_alias, + version=version_data, + ) + def _get_project_data(self, obj): """ Get and cache the project data. diff --git a/readthedocs/search/api/v2/views.py b/readthedocs/search/api/v2/views.py index ad3612af9bb..aaf81201092 100644 --- a/readthedocs/search/api/v2/views.py +++ b/readthedocs/search/api/v2/views.py @@ -14,11 +14,7 @@ from readthedocs.projects.models import Feature, Project from readthedocs.search import tasks from readthedocs.search.api import SearchPagination -from readthedocs.search.api.v2.serializers import ( - PageSearchSerializer, - ProjectData, - VersionData, -) +from readthedocs.search.api.v2.serializers import PageSearchSerializer from readthedocs.search.faceted_search import PageSearch log = structlog.get_logger(__name__) @@ -80,45 +76,15 @@ def _validate_query_params(self): raise ValidationError(errors) @lru_cache(maxsize=1) - def _get_all_projects_data(self): - """ - Return a dictionary of the project itself and all its subprojects. - - Example: - - .. code:: - - { - "requests": ProjectData( - alias='alias', - version=VersionData( - "latest", - "https://requests.readthedocs.io/en/latest/", - ), - ), - "requests-oauth": ProjectData( - alias=None, - version=VersionData( - "latest", - "https://requests-oauth.readthedocs.io/en/latest/", - ), - ), - } - - .. note:: The response is cached into the instance. - - :rtype: A dictionary of project slugs mapped to a `VersionData` object. - """ + def _get_projects_to_search(self): + """Get all projects to search.""" main_version = self._get_version() main_project = self._get_project() if not self._has_permission(self.request.user, main_version): - return {} - - projects_data = { - main_project.slug: self._get_project_data(main_project, main_version), - } + return [] + projects_to_search = [(main_project, main_version)] subprojects = Project.objects.filter(superprojects__parent_id=main_project.id) for subproject in subprojects: version = self._get_project_version( @@ -136,24 +102,9 @@ def _get_all_projects_data(self): ) if version and self._has_permission(self.request.user, version): - projects_data[subproject.slug] = self._get_project_data( - subproject, version - ) + projects_to_search.append((subproject, version)) - return projects_data - - def _get_project_data(self, project, version): - """Build a `ProjectData` object given a project and its version.""" - url = project.get_docs_url(version_slug=version.slug) - project_alias = project.superprojects.values_list("alias", flat=True).first() - version_data = VersionData( - slug=version.slug, - docs_url=url, - ) - return ProjectData( - alias=project_alias, - version=version_data, - ) + return projects_to_search def _get_project_version(self, project, version_slug, include_hidden=True): """ @@ -219,8 +170,8 @@ def get_queryset(self): is compatible with DRF's paginator. """ projects = { - project: project_data.version.slug - for project, project_data in self._get_all_projects_data().items() + project.slug: version.slug + for project, version in self._get_projects_to_search() } # Check to avoid searching all projects in case it's empty. if not projects: @@ -236,11 +187,6 @@ def get_queryset(self): ) return queryset - def get_serializer_context(self): - context = super().get_serializer_context() - context["projects_data"] = self._get_all_projects_data() - return context - def get(self, request, *args, **kwargs): self._validate_query_params() result = self.list() @@ -255,7 +201,9 @@ def list(self): self.request, view=self, ) - serializer = self.get_serializer(page, many=True) + serializer = self.get_serializer( + page, many=True, projects=self._get_projects_to_search() + ) return self.paginator.get_paginated_response(serializer.data) diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 18db6e70b7b..b6663ab8c50 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -342,9 +342,9 @@ def test_doc_search_unexisting_version(self, api_client, project): resp = self.get_search(api_client, search_params) assert resp.status_code == 404 - @mock.patch.object(PageSearchAPIView, '_get_all_projects_data', dict) + @mock.patch.object(PageSearchAPIView, "_get_projects_to_search", list) def test_get_all_projects_returns_empty_results(self, api_client, project): - """If there is a case where `_get_all_projects` returns empty, we could be querying all projects.""" + """If there is a case where `_get_projects_to_search` returns empty, we could be querying all projects.""" # `documentation` word is present both in `kuma` and `docs` files # and not in `pipeline`, so search with this phrase but filter through project diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index b9963541a5f..baa18478b4a 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -10,9 +10,7 @@ from readthedocs.projects.models import Feature, Project from readthedocs.search.api.v2.serializers import ( PageSearchSerializer, - ProjectData, ProjectSearchSerializer, - VersionData, ) from readthedocs.search.faceted_search import ALL_FACETS, PageSearch, ProjectSearch @@ -91,26 +89,6 @@ def _get_project(self, project_slug): project = get_object_or_404(queryset, slug=project_slug) return project - def _get_project_data(self, project, version_slug): - docs_url = project.get_docs_url(version_slug=version_slug) - version_data = VersionData( - slug=version_slug, - docs_url=docs_url, - ) - project_data = { - project.slug: ProjectData( - alias=None, - version=version_data, - ) - } - return project_data - - def get_serializer_context(self, project, version_slug): - context = { - 'projects_data': self._get_project_data(project, version_slug), - } - return context - def get(self, request, project_slug): project_obj = self._get_project(project_slug) use_advanced_query = not project_obj.has_feature( @@ -132,8 +110,7 @@ def get(self, request, project_slug): use_advanced_query=use_advanced_query, ) - context = self.get_serializer_context(project_obj, user_input.version) - results = PageSearchSerializer(results, many=True, context=context).data + results = PageSearchSerializer(results, many=True).data template_context = user_input._asdict() template_context.update({ From 4b9857818608f6fc4c38d23acfc06c994a37cc11 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 26 Sep 2022 19:31:47 -0500 Subject: [PATCH 05/35] Search API V3 --- readthedocs/search/api/v3/__init__.py | 0 readthedocs/search/api/v3/backend.py | 185 +++++++++++++++++++++++ readthedocs/search/api/v3/parser.py | 70 +++++++++ readthedocs/search/api/v3/serializers.py | 23 +++ readthedocs/search/api/v3/urls.py | 7 + readthedocs/search/api/v3/views.py | 125 +++++++++++++++ readthedocs/urls.py | 1 + 7 files changed, 411 insertions(+) create mode 100644 readthedocs/search/api/v3/__init__.py create mode 100644 readthedocs/search/api/v3/backend.py create mode 100644 readthedocs/search/api/v3/parser.py create mode 100644 readthedocs/search/api/v3/serializers.py create mode 100644 readthedocs/search/api/v3/urls.py create mode 100644 readthedocs/search/api/v3/views.py diff --git a/readthedocs/search/api/v3/__init__.py b/readthedocs/search/api/v3/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py new file mode 100644 index 00000000000..fdaecb40dbf --- /dev/null +++ b/readthedocs/search/api/v3/backend.py @@ -0,0 +1,185 @@ +from readthedocs.search.faceted_search import PageSearch +from readthedocs.builds.models import Version +from itertools import islice +from functools import cached_property +from readthedocs.projects.models import Project +from readthedocs.search.api.v3.parser import SearchQueryParser + + +class Backend: + + max_projects = 100 + + def __init__(self, *, request, query, allow_search_all=False): + self.request = request + self.query = query + self.allow_search_all = allow_search_all + + @cached_property + def projects(self): + return list(islice(self._get_projects_to_search(), self.max_projects)) + + def search(self, **kwargs): + projects = { + project.slug: version.slug + for project, version in self.projects + } + # If the search is done without projects, ES will search on all projects. + # If we don't have projects and the user provided arguments, + # it means we don't have anything to search on (no results). + # Or if we don't have projects and we don't allow searching all, + # we also just return. + if not projects and (self._has_arguments or not self.allow_search_all): + return None + + queryset = PageSearch( + query=self.parser.query, + projects=projects, + **kwargs, + ) + return queryset + + def _get_projects_to_search(self): + if not self._has_arguments: + return self._get_default_projects() + + for value in self.parser.arguments["project"]: + project, version = self._get_project_and_version(value) + if version and self._has_permission(self.request.user, version): + yield project, version + + for value in self.parser.arguments['subprojects']: + project, version = self._get_project_and_version(value) + + # Add the project itself. + if version and self._has_permission(self.request.user, version): + yield project, version + + # If the user didn't provide a version, version_slug will be `None`, + # and we add all subprojects with their default version, + # otherwise we will add all projects that match the given version. + _, version_slug = self._split_project_and_version(value) + if project: + yield from self._get_subprojects( + project=project, + version_slug=version_slug, + ) + + # Add all projects the user has access to. + if self.parser.arguments["user"] == "@me": + yield from self._get_projects_from_user() + + def _get_projects_from_user(self): + for project in Project.objects.for_user(user=self.request.user): + version = self._get_project_version( + project=project, + version_slug=project.default_version, + include_hidden=False, + ) + if version and self._has_permission(self.request.user, version): + yield project, version + + def _get_subprojects(self, project, version_slug=None): + """ + Get a tuple project/version of all subprojects of `project`. + + If `version_slug` doesn't match a version of the subproject, + the default version will be used. + If `version_slug` is None, we will always use the default version. + """ + subprojects = Project.objects.filter(superprojects__parent=project) + for subproject in subprojects: + version = None + if version_slug: + version = self._get_project_version( + project=subproject, + version_slug=version_slug, + include_hidden=False, + ) + + # Fallback to the default version of the subproject. + if not version and subproject.default_version: + version = self._get_project_version( + project=subproject, + version_slug=subproject.default_version, + include_hidden=False, + ) + + if version and self._has_permission(self.request.user, version): + yield project, version + + def _has_permission(self, user, version): + """ + Check if `user` is authorized to access `version`. + + The queryset from `_get_project_version` already filters public + projects. This is mainly to be overridden in .com to make use of + the auth backends in the proxied API. + """ + return True + + def _get_project_version(self, project, version_slug, include_hidden=True): + """ + Get a version from a given project. + + :param project: A `Project` object. + :param version_slug: The version slug. + :param include_hidden: If hidden versions should be considered. + """ + return ( + Version.internal + .public( + user=self.request.user, + project=project, + only_built=True, + include_hidden=include_hidden, + ) + .filter(slug=version_slug) + .first() + ) + + @property + def _has_arguments(self): + return any(self.parser.arguments.values()) + + def _get_default_projects(self): + if self.allow_search_all: + # Default to search all. + return [] + return self._get_projects_from_user() + + @cached_property + def parser(self): + parser = SearchQueryParser(self.query) + parser.parse() + return parser + + def _split_project_and_version(self, term): + """ + Split a term of the form ``{project}/{version}``. + + :returns: A tuple of project and version. + If the version part isn't found, `None` will be returned in its place. + """ + parts = term.split("/", maxsplit=1) + if len(parts) > 1: + return parts + return parts[0], None + + def _get_project_and_version(self, value): + project_slug, version_slug = self._split_project_and_version(value) + project = Project.objects.filter(slug=project_slug).first() + if not project: + return None, None + + if not version_slug: + version_slug = project.default_version + + if version_slug: + version = self._get_project_version( + project=project, + version_slug=version_slug, + ) + return project, version + + return None, None diff --git a/readthedocs/search/api/v3/parser.py b/readthedocs/search/api/v3/parser.py new file mode 100644 index 00000000000..8c90a2c95a9 --- /dev/null +++ b/readthedocs/search/api/v3/parser.py @@ -0,0 +1,70 @@ +class TextToken: + + def __init__(self, text): + self.text = text + + +class ArgumentToken: + + def __init__(self, *, name, value, type): + self.name = name + self.value = value + self.type = type + + +class SearchQueryParser: + + allowed_arguments = { + "project": list, + "subprojects": list, + "user": str, + } + + def __init__(self, query): + self._query = query + self.query = "" + self.arguments = {} + + def parse(self): + tokens = ( + self._get_token(text) + for text in self._query.split() + ) + query = [] + arguments = { + name: type() + for name, type in self.allowed_arguments.items() + } + for token in tokens: + if isinstance(token, TextToken): + query.append(token.text) + elif isinstance(token, ArgumentToken): + if token.type == str: + arguments[token.name] = token.value + elif token.type == list: + arguments[token.name].append(token.value) + else: + raise ValueError(f"Invalid argument type {token.type}") + else: + raise ValueError("Invalid node") + + self.query = self._unescape(" ".join(query)) + self.arguments = arguments + + def _get_token(self, text): + result = text.split(":", maxsplit=1) + if len(result) < 2: + return TextToken(text) + + name, value = result + if name in self.allowed_arguments: + return ArgumentToken( + name=name, + value=value, + type=self.allowed_arguments[name], + ) + + return TextToken(text) + + def _unescape(self, text): + return text.replace("\\:", ":") diff --git a/readthedocs/search/api/v3/serializers.py b/readthedocs/search/api/v3/serializers.py new file mode 100644 index 00000000000..fe111eab254 --- /dev/null +++ b/readthedocs/search/api/v3/serializers.py @@ -0,0 +1,23 @@ +from readthedocs.search.api.v2.serializers import PageSearchSerializer as PageSearchSerializerBase +from rest_framework import serializers + + +class PageSearchSerializer(PageSearchSerializerBase): + + project = serializers.SerializerMethodField() + version = serializers.SerializerMethodField() + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.fields.pop("project_alias") + + def get_project(self, obj): + return { + "slug": obj.project, + "alias": self.get_project_alias(obj), + } + + def get_version(self, obj): + return { + "slug": obj.version, + } diff --git a/readthedocs/search/api/v3/urls.py b/readthedocs/search/api/v3/urls.py new file mode 100644 index 00000000000..2172ab98098 --- /dev/null +++ b/readthedocs/search/api/v3/urls.py @@ -0,0 +1,7 @@ +from django.urls import path + +from readthedocs.search.api.v3.views import SearchAPI + +urlpatterns = [ + path("", SearchAPI.as_view(), name="search_api_v3"), +] diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py new file mode 100644 index 00000000000..e2195893f75 --- /dev/null +++ b/readthedocs/search/api/v3/views.py @@ -0,0 +1,125 @@ +from rest_framework.generics import GenericAPIView +from rest_framework.exceptions import ValidationError +from readthedocs.search import tasks +from django.utils import timezone +from readthedocs.projects.models import Feature +import structlog +from django.utils.translation import gettext as _ + +from readthedocs.search.api import SearchPagination +from readthedocs.search.api.v3.serializers import PageSearchSerializer +from readthedocs.search.api.v3.backend import Backend + + +log = structlog.get_logger(__name__) + + +class SearchAPI(GenericAPIView): + + """ + Server side search API V3. + + Required query parameters: + + - **q**: Search term. + + Check our [docs](https://docs.readthedocs.io/page/server-side-search.html#api) for more information. + """ # noqa + + http_method_names = ['get'] + pagination_class = SearchPagination + serializer_class = PageSearchSerializer + search_backend_class = Backend + + def get_view_name(self): + return "Search API V3" + + def _validate_query_params(self): + query = self.request.GET.get("q") + errors = {} + if not query: + errors["q"] = [_("This query parameter is required")] + if errors: + raise ValidationError(errors) + + @property + def _backend(self): + backend = self.search_backend_class( + request=self.request, + query=self.request.GET["q"], + ) + return backend + + def _get_search_query(self): + return self._backend.parser.query + + def _get_projects_to_search(self): + return self._backend.projects + + def _use_advanced_query(self): + # TODO: we should make this a parameter in the API, + # we are checking if the first project has this feature for now. + projects = self._get_projects_to_search() + if projects: + project = projects[0][0] + return not project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH) + return True + + def get_queryset(self): + """ + Returns an Elasticsearch DSL search object or an iterator. + + .. note:: + + Calling ``list(search)`` over an DSL search object is the same as + calling ``search.execute().hits``. This is why an DSL search object + is compatible with DRF's paginator. + """ + search = self._backend.search( + use_advanced_query=self._use_advanced_query() + ) + if not search: + return [] + + return search + + def get(self, request, *args, **kwargs): + self._validate_query_params() + result = self.list() + self._record_query(result) + return result + + def _record_query(self, response): + total_results = response.data.get('count', 0) + time = timezone.now() + query = self._get_search_query().lower().strip() + # NOTE: I think this may be confusing, + # since the number of results is the total + # of searching on all projects, this specific project + # could have had 0 results. + for project, version in self._get_projects_to_search(): + tasks.record_search_query.delay( + project.slug, + version.slug, + query, + total_results, + time.isoformat(), + ) + + def list(self): + queryset = self.get_queryset() + page = self.paginator.paginate_queryset( + queryset, + self.request, + view=self, + ) + serializer = self.get_serializer( + page, many=True, projects=self._get_projects_to_search() + ) + response = self.paginator.get_paginated_response(serializer.data) + response.data["projects"] = [ + [project.slug, version.slug] + for project, version in self._get_projects_to_search() + ] + response.data["query"] = self._get_search_query() + return response diff --git a/readthedocs/urls.py b/readthedocs/urls.py index b378729e883..db32f19b1f3 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -91,6 +91,7 @@ re_path(r'^api/v2/', include('readthedocs.api.v2.urls')), # Keep `search_api` at root level, so the test does not fail for other API re_path(r"^api/v2/search/$", include("readthedocs.search.api.v2.urls")), + path("^api/v3/search/", include("readthedocs.search.api.v3.urls")), # Deprecated re_path(r'^api/v1/embed/', include('readthedocs.embed.urls')), re_path(r'^api/v2/embed/', include('readthedocs.embed.urls')), From 2ba58bc1f3111e7132b70b6843ca84819aaa33d0 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 27 Sep 2022 14:45:45 -0500 Subject: [PATCH 06/35] Support new syntax in the dashboard --- readthedocs/core/templatetags/core_tags.py | 4 +- readthedocs/search/api/v3/views.py | 3 +- readthedocs/search/faceted_search.py | 2 - readthedocs/search/views.py | 72 ++++++++++++------- .../templates/search/elastic_search.html | 63 ++++------------ 5 files changed, 66 insertions(+), 78 deletions(-) diff --git a/readthedocs/core/templatetags/core_tags.py b/readthedocs/core/templatetags/core_tags.py index eeb3727a306..87ec7e79a9e 100644 --- a/readthedocs/core/templatetags/core_tags.py +++ b/readthedocs/core/templatetags/core_tags.py @@ -70,9 +70,9 @@ def get_version(slug): @register.simple_tag -def url_replace(request, field, value): +def url_replace(request, field, *values): dict_ = request.GET.copy() - dict_[field] = value + dict_[field] = "".join(values) return dict_.urlencode() diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index e2195893f75..3382cae396c 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -76,7 +76,8 @@ def get_queryset(self): is compatible with DRF's paginator. """ search = self._backend.search( - use_advanced_query=self._use_advanced_query() + use_advanced_query=self._use_advanced_query(), + aggregate_results=False, ) if not search: return [] diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 4dfb8b0622f..56dc4a8bcef 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -20,8 +20,6 @@ log = structlog.get_logger(__name__) -ALL_FACETS = ['project', 'version', 'role_name', 'language'] - class RTDFacetedSearch(FacetedSearch): diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index baa18478b4a..a0fd5d3eafc 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -5,14 +5,14 @@ from django.conf import settings from django.shortcuts import get_object_or_404, render from django.views import View +from readthedocs.search.api.v3.backend import Backend -from readthedocs.builds.constants import LATEST from readthedocs.projects.models import Feature, Project from readthedocs.search.api.v2.serializers import ( - PageSearchSerializer, ProjectSearchSerializer, ) -from readthedocs.search.faceted_search import ALL_FACETS, PageSearch, ProjectSearch +from readthedocs.search.api.v3.serializers import PageSearchSerializer +from readthedocs.search.faceted_search import PageSearch, ProjectSearch log = structlog.get_logger(__name__) @@ -21,10 +21,7 @@ ( 'query', 'type', - 'project', - 'version', 'language', - 'role_name', ), ) @@ -33,6 +30,7 @@ class SearchViewBase(View): http_method_names = ['get'] max_search_results = 50 + available_facets = ["language"] def _search(self, *, user_input, projects, use_advanced_query): """Return search results and facets given a `user_input` and `projects` to filter by.""" @@ -40,7 +38,7 @@ def _search(self, *, user_input, projects, use_advanced_query): return [], {} filters = {} - for avail_facet in ALL_FACETS: + for avail_facet in self.available_facets: value = getattr(user_input, avail_facet, None) if value: filters[avail_facet] = value @@ -98,15 +96,12 @@ def get(self, request, project_slug): user_input = UserInput( query=request.GET.get('q'), type='file', - project=project_slug, - version=request.GET.get('version', LATEST), - role_name=request.GET.get('role_name'), language=None, ) results, facets = self._search( user_input=user_input, - projects=[user_input.project], + projects=[project_slug], use_advanced_query=use_advanced_query, ) @@ -114,7 +109,9 @@ def get(self, request, project_slug): template_context = user_input._asdict() template_context.update({ + "search_query": user_input.query, 'results': results, + "total_count": len(results), 'facets': facets, 'project_obj': project_obj, }) @@ -135,22 +132,51 @@ class GlobalSearchView(SearchViewBase): - q: search term - type: type of document to search (project or file) - - project: project to filter by - - language: project language to filter by - - version: version to filter by - - role_name: sphinx role to filter by + - language: project language to filter by (only valid if type is project) """ def get(self, request): user_input = UserInput( query=request.GET.get('q'), type=request.GET.get('type', 'project'), - project=request.GET.get('project'), - version=request.GET.get('version', LATEST), language=request.GET.get('language'), - role_name=request.GET.get('role_name'), ) + if user_input.type == "file": + return self._searh_files() + return self._search_projects(user_input, request) + + def _searh_files(self): + results, facets = [], {} + search_query = "" + query = self.request.GET.get("q") + if query: + backend = Backend( + request=self.request, + query=query, + allow_search_all=not settings.ALLOW_PRIVATE_REPOS, + ) + search_query = backend.parser.query + search = backend.search() + if search: + results = search[:self.max_search_results].execute() + facets = results.facets + results = PageSearchSerializer(results, projects=backend.projects, many=True).data + + return render( + self.request, + 'search/elastic_search.html', + { + "query": query, + "search_query": search_query, + "results": results, + "facets": facets, + "total_count": len(results), + "type": "file", + }, + ) + + def _search_projects(self, user_input, request): projects = [] # If we allow private projects, # we only search on projects the user belongs or have access to. @@ -170,16 +196,12 @@ def get(self, request): use_advanced_query=True, ) - serializers = { - 'project': ProjectSearchSerializer, - 'file': PageSearchSerializer, - } - serializer = serializers.get(user_input.type, ProjectSearchSerializer) - results = serializer(results, many=True).data - + results = ProjectSearchSerializer(results, many=True).data template_context = user_input._asdict() template_context.update({ + "search_query": user_input.query, 'results': results, + "total_count": len(results), 'facets': facets, }) diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index 8773b353a3b..921e9f9eae4 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -36,30 +36,17 @@
{% trans 'Object Type' %}
{% if facets.project and not project_obj %}
{% trans 'Projects' %}
+ +
  • + + {% trans 'Search all' %} + +
  • + {% for name, count, selected in facets.project %}
  • - {% if project == name %} - {{ name }} - {% else %} - {{ name }} - {% endif %} - ({{ count }}) - -
  • - {% endfor %} -
    - {% endif %} - - {% if facets.version %} -
    {% trans 'Version' %}
    - {% for name, count, selected in facets.version %} -
  • - {% if version == name %} - {{ name }} - {% else %} - {{ name }} - {% endif %} - ({{ count }}) + + {{ name }} ({{ count }})
  • {% endfor %} @@ -82,23 +69,6 @@
    {% trans 'Language' %}

    {% endif %} - - {% if facets.role_name %} -
    {% trans 'Code API Type' %}
    - {% for name, count, selected in facets.role_name %} -
  • - {% if role_name == name %} - {{ name }} - {% else %} - {{ name }} - {% endif %} - ({{ count }}) - -
  • - {% endfor %} -
    - {% endif %} - {% block sponsor %}
    Search is sponsored by Elastic, and hosted on Elastic Cloud. @@ -115,10 +85,7 @@

    {% trans 'Search' %}

    {% if type %} {% endif %} - {% if project %} {% endif %} - {% if version %} {% endif %} {% if language %} {% endif %} - {% if role_name %} {% endif %}
    {% comment %}Translators: This is about starting a search{% endcomment %} @@ -135,8 +102,8 @@

    {% trans 'Search' %}

    - {% blocktrans with count=results.hits.total query=query|default:"" %} - {{ count }} results for `{{ query }}` + {% blocktrans with count=total_count|default:"0" query=search_query|default:"" %} + {{ count }} results for {{ query }} {% endblocktrans %}

    @@ -167,14 +134,14 @@

    {% endfor %} {% elif result.type == 'page' %} - - {{ result.project }} - {% if result.highlights.title %} {{ result.highlights.title.0|safe }} {% else %} {{ result.title }} {% endif %} + + {{ result.project.slug }} - {% if result.highlights.title %} {{ result.highlights.title.0|safe }} {% else %} {{ result.title }} {% endif %} {% for block in result.blocks %} {% if block.type == 'domain' %}

    - + {% if block.highlights.name %} {% with domain_name=block.highlights.name %} [{{ block.role }}]: {{ domain_name.0|safe }} @@ -198,7 +165,7 @@

    {% elif block.type == 'section' %}

    - + {% if block.highlights.title %} {% with section_title=block.highlights.title %} {{ section_title.0|safe }} From d63060f10007a1fa011cb88e471fce88202af9b3 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 27 Sep 2022 18:57:19 -0500 Subject: [PATCH 07/35] Black --- readthedocs/core/templatetags/core_tags.py | 1 - readthedocs/search/api/v3/backend.py | 21 +++++----- readthedocs/search/api/v3/parser.py | 12 +----- readthedocs/search/api/v3/serializers.py | 7 +++- readthedocs/search/api/v3/views.py | 19 +++++---- readthedocs/search/faceted_search.py | 2 +- readthedocs/search/views.py | 47 ++++++++++++---------- 7 files changed, 52 insertions(+), 57 deletions(-) diff --git a/readthedocs/core/templatetags/core_tags.py b/readthedocs/core/templatetags/core_tags.py index 87ec7e79a9e..6675940d839 100644 --- a/readthedocs/core/templatetags/core_tags.py +++ b/readthedocs/core/templatetags/core_tags.py @@ -14,7 +14,6 @@ from readthedocs.core.resolver import resolve from readthedocs.projects.models import Project - register = template.Library() diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index fdaecb40dbf..31018bff9c9 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -1,9 +1,10 @@ -from readthedocs.search.faceted_search import PageSearch -from readthedocs.builds.models import Version -from itertools import islice from functools import cached_property +from itertools import islice + +from readthedocs.builds.models import Version from readthedocs.projects.models import Project from readthedocs.search.api.v3.parser import SearchQueryParser +from readthedocs.search.faceted_search import PageSearch class Backend: @@ -20,10 +21,7 @@ def projects(self): return list(islice(self._get_projects_to_search(), self.max_projects)) def search(self, **kwargs): - projects = { - project.slug: version.slug - for project, version in self.projects - } + projects = {project.slug: version.slug for project, version in self.projects} # If the search is done without projects, ES will search on all projects. # If we don't have projects and the user provided arguments, # it means we don't have anything to search on (no results). @@ -31,7 +29,7 @@ def search(self, **kwargs): # we also just return. if not projects and (self._has_arguments or not self.allow_search_all): return None - + queryset = PageSearch( query=self.parser.query, projects=projects, @@ -47,8 +45,8 @@ def _get_projects_to_search(self): project, version = self._get_project_and_version(value) if version and self._has_permission(self.request.user, version): yield project, version - - for value in self.parser.arguments['subprojects']: + + for value in self.parser.arguments["subprojects"]: project, version = self._get_project_and_version(value) # Add the project itself. @@ -127,8 +125,7 @@ def _get_project_version(self, project, version_slug, include_hidden=True): :param include_hidden: If hidden versions should be considered. """ return ( - Version.internal - .public( + Version.internal.public( user=self.request.user, project=project, only_built=True, diff --git a/readthedocs/search/api/v3/parser.py b/readthedocs/search/api/v3/parser.py index 8c90a2c95a9..369800d362e 100644 --- a/readthedocs/search/api/v3/parser.py +++ b/readthedocs/search/api/v3/parser.py @@ -1,11 +1,9 @@ class TextToken: - def __init__(self, text): self.text = text class ArgumentToken: - def __init__(self, *, name, value, type): self.name = name self.value = value @@ -26,15 +24,9 @@ def __init__(self, query): self.arguments = {} def parse(self): - tokens = ( - self._get_token(text) - for text in self._query.split() - ) + tokens = (self._get_token(text) for text in self._query.split()) query = [] - arguments = { - name: type() - for name, type in self.allowed_arguments.items() - } + arguments = {name: type() for name, type in self.allowed_arguments.items()} for token in tokens: if isinstance(token, TextToken): query.append(token.text) diff --git a/readthedocs/search/api/v3/serializers.py b/readthedocs/search/api/v3/serializers.py index fe111eab254..3359f29aee6 100644 --- a/readthedocs/search/api/v3/serializers.py +++ b/readthedocs/search/api/v3/serializers.py @@ -1,6 +1,9 @@ -from readthedocs.search.api.v2.serializers import PageSearchSerializer as PageSearchSerializerBase from rest_framework import serializers +from readthedocs.search.api.v2.serializers import ( + PageSearchSerializer as PageSearchSerializerBase, +) + class PageSearchSerializer(PageSearchSerializerBase): @@ -10,7 +13,7 @@ class PageSearchSerializer(PageSearchSerializerBase): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.fields.pop("project_alias") - + def get_project(self, obj): return { "slug": obj.project, diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 3382cae396c..a2bd1cd7814 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -1,15 +1,14 @@ -from rest_framework.generics import GenericAPIView -from rest_framework.exceptions import ValidationError -from readthedocs.search import tasks -from django.utils import timezone -from readthedocs.projects.models import Feature import structlog +from django.utils import timezone from django.utils.translation import gettext as _ +from rest_framework.exceptions import ValidationError +from rest_framework.generics import GenericAPIView +from readthedocs.projects.models import Feature +from readthedocs.search import tasks from readthedocs.search.api import SearchPagination -from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.api.v3.backend import Backend - +from readthedocs.search.api.v3.serializers import PageSearchSerializer log = structlog.get_logger(__name__) @@ -26,7 +25,7 @@ class SearchAPI(GenericAPIView): Check our [docs](https://docs.readthedocs.io/page/server-side-search.html#api) for more information. """ # noqa - http_method_names = ['get'] + http_method_names = ["get"] pagination_class = SearchPagination serializer_class = PageSearchSerializer search_backend_class = Backend @@ -78,7 +77,7 @@ def get_queryset(self): search = self._backend.search( use_advanced_query=self._use_advanced_query(), aggregate_results=False, - ) + ) if not search: return [] @@ -91,7 +90,7 @@ def get(self, request, *args, **kwargs): return result def _record_query(self, response): - total_results = response.data.get('count', 0) + total_results = response.data.get("count", 0) time = timezone.now() query = self._get_search_query().lower().strip() # NOTE: I think this may be confusing, diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 56dc4a8bcef..2fc140d6e19 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -1,6 +1,6 @@ -import structlog import re +import structlog from django.conf import settings from elasticsearch import Elasticsearch from elasticsearch_dsl import FacetedSearch, TermsFacet diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index a0fd5d3eafc..3dcb18930d0 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -5,12 +5,10 @@ from django.conf import settings from django.shortcuts import get_object_or_404, render from django.views import View -from readthedocs.search.api.v3.backend import Backend from readthedocs.projects.models import Feature, Project -from readthedocs.search.api.v2.serializers import ( - ProjectSearchSerializer, -) +from readthedocs.search.api.v2.serializers import ProjectSearchSerializer +from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.faceted_search import PageSearch, ProjectSearch @@ -108,13 +106,15 @@ def get(self, request, project_slug): results = PageSearchSerializer(results, many=True).data template_context = user_input._asdict() - template_context.update({ - "search_query": user_input.query, - 'results': results, - "total_count": len(results), - 'facets': facets, - 'project_obj': project_obj, - }) + template_context.update( + { + "search_query": user_input.query, + "results": results, + "total_count": len(results), + "facets": facets, + "project_obj": project_obj, + } + ) return render( request, @@ -158,13 +158,17 @@ def _searh_files(self): search_query = backend.parser.query search = backend.search() if search: - results = search[:self.max_search_results].execute() + results = search[: self.max_search_results].execute() facets = results.facets - results = PageSearchSerializer(results, projects=backend.projects, many=True).data + results = PageSearchSerializer( + results, + projects=backend.projects, + many=True, + ).data return render( self.request, - 'search/elastic_search.html', + "search/elastic_search.html", { "query": query, "search_query": search_query, @@ -175,7 +179,6 @@ def _searh_files(self): }, ) - def _search_projects(self, user_input, request): projects = [] # If we allow private projects, @@ -198,12 +201,14 @@ def _search_projects(self, user_input, request): results = ProjectSearchSerializer(results, many=True).data template_context = user_input._asdict() - template_context.update({ - "search_query": user_input.query, - 'results': results, - "total_count": len(results), - 'facets': facets, - }) + template_context.update( + { + "search_query": user_input.query, + "results": results, + "total_count": len(results), + "facets": facets, + } + ) return render( request, From ae4dddf9dd4e478f9fa4df144cb1d19f3271b50d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 27 Sep 2022 19:55:49 -0500 Subject: [PATCH 08/35] Redirect to global search from project search --- readthedocs/search/api/v3/utils.py | 9 + readthedocs/search/api/v3/views.py | 14 +- readthedocs/search/faceted_search.py | 18 -- readthedocs/search/views.py | 193 +++++++----------- .../templates/core/project_bar_base.html | 2 +- 5 files changed, 88 insertions(+), 148 deletions(-) create mode 100644 readthedocs/search/api/v3/utils.py diff --git a/readthedocs/search/api/v3/utils.py b/readthedocs/search/api/v3/utils.py new file mode 100644 index 00000000000..5b4e88c01dc --- /dev/null +++ b/readthedocs/search/api/v3/utils.py @@ -0,0 +1,9 @@ +from readthedocs.projects.models import Feature + +def should_use_advanced_query(projects): + # TODO: we should make this a parameter in the API, + # we are checking if the first project has this feature for now. + if projects: + project = projects[0][0] + return not project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH) + return True diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index a2bd1cd7814..599af347809 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -1,10 +1,10 @@ import structlog +from readthedocs.search.api.v3.utils import should_use_advanced_query from django.utils import timezone from django.utils.translation import gettext as _ from rest_framework.exceptions import ValidationError from rest_framework.generics import GenericAPIView -from readthedocs.projects.models import Feature from readthedocs.search import tasks from readthedocs.search.api import SearchPagination from readthedocs.search.api.v3.backend import Backend @@ -55,15 +55,6 @@ def _get_search_query(self): def _get_projects_to_search(self): return self._backend.projects - def _use_advanced_query(self): - # TODO: we should make this a parameter in the API, - # we are checking if the first project has this feature for now. - projects = self._get_projects_to_search() - if projects: - project = projects[0][0] - return not project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH) - return True - def get_queryset(self): """ Returns an Elasticsearch DSL search object or an iterator. @@ -74,8 +65,9 @@ def get_queryset(self): calling ``search.execute().hits``. This is why an DSL search object is compatible with DRF's paginator. """ + use_advanced_query = should_use_advanced_query(self._get_projects_to_search()) search = self._backend.search( - use_advanced_query=self._use_advanced_query(), + use_advanced_query=use_advanced_query, aggregate_results=False, ) if not search: diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index 2fc140d6e19..e5fe90e3abb 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -4,7 +4,6 @@ from django.conf import settings from elasticsearch import Elasticsearch from elasticsearch_dsl import FacetedSearch, TermsFacet -from elasticsearch_dsl.faceted_search import NestedFacet from elasticsearch_dsl.query import ( Bool, FunctionScore, @@ -266,11 +265,6 @@ def query(self, search, query): class PageSearch(RTDFacetedSearch): facets = { 'project': TermsFacet(field='project'), - 'version': TermsFacet(field='version'), - 'role_name': NestedFacet( - 'domains', - TermsFacet(field='domains.role_name') - ), } doc_types = [PageDocument] index = PageDocument._index._name @@ -363,18 +357,6 @@ def _get_nested_query(self, *, query, path, fields): for field in fields ] - # The ``post_filter`` filter will only filter documents - # at the parent level (domains is a nested document), - # resulting in results with domains that don't match the current - # role_name being filtered, so we need to force filtering by role_name - # on the ``domains`` document here. See #8268. - # TODO: We should use a flattened document instead - # to avoid this kind of problems and have faster queries. - role_name = self.filter_values.get('role_name') - if path == 'domains' and role_name: - role_name_query = Bool(must=Terms(**{'domains.role_name': role_name})) - bool_query = Bool(must=[role_name_query, bool_query]) - highlight = dict( self._highlight_options, fields={ diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 3dcb18930d0..0fc35afb8d5 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -3,14 +3,18 @@ import structlog from django.conf import settings -from django.shortcuts import get_object_or_404, render +from readthedocs.search.api.v3.utils import should_use_advanced_query from django.views import View +from django.urls import reverse +from django.http.response import HttpResponseRedirect +from django.views.generic import TemplateView +from urllib.parse import urlencode -from readthedocs.projects.models import Feature, Project +from readthedocs.projects.models import Project from readthedocs.search.api.v2.serializers import ProjectSearchSerializer from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer -from readthedocs.search.faceted_search import PageSearch, ProjectSearch +from readthedocs.search.faceted_search import ProjectSearch log = structlog.get_logger(__name__) @@ -24,106 +28,27 @@ ) -class SearchViewBase(View): - - http_method_names = ['get'] - max_search_results = 50 - available_facets = ["language"] - - def _search(self, *, user_input, projects, use_advanced_query): - """Return search results and facets given a `user_input` and `projects` to filter by.""" - if not user_input.query: - return [], {} - - filters = {} - for avail_facet in self.available_facets: - value = getattr(user_input, avail_facet, None) - if value: - filters[avail_facet] = value - - search_facets = { - 'project': ProjectSearch, - 'file': PageSearch, - } - faceted_search_class = search_facets.get( - user_input.type, - ProjectSearch, - ) - search = faceted_search_class( - query=user_input.query, - filters=filters, - projects=projects, - use_advanced_query=use_advanced_query, - ) - results = search[:self.max_search_results].execute() - facets = results.facets - - # Make sure the selected facets are displayed, - # even when they return 0 results. - for facet in facets: - value = getattr(user_input, facet, None) - if value and value not in (name for name, *_ in facets[facet]): - facets[facet].insert(0, (value, 0, True)) - - return results, facets - - -class ProjectSearchView(SearchViewBase): +class ProjectSearchView(View): """ Search view of the ``search`` tab. + This redirects to the main search now. + Query params: - q: search term - - version: version to filter by - - role_name: sphinx role to filter by """ - def _get_project(self, project_slug): - queryset = Project.objects.public(self.request.user) - project = get_object_or_404(queryset, slug=project_slug) - return project + http_method_names = ['get'] def get(self, request, project_slug): - project_obj = self._get_project(project_slug) - use_advanced_query = not project_obj.has_feature( - Feature.DEFAULT_TO_FUZZY_SEARCH, - ) - - user_input = UserInput( - query=request.GET.get('q'), - type='file', - language=None, - ) - - results, facets = self._search( - user_input=user_input, - projects=[project_slug], - use_advanced_query=use_advanced_query, - ) - - results = PageSearchSerializer(results, many=True).data + query = request.GET.get("q", "") + url = reverse("search") + "?" + urlencode({"q": f"project:{project_slug} {query}"}) + return HttpResponseRedirect(url) - template_context = user_input._asdict() - template_context.update( - { - "search_query": user_input.query, - "results": results, - "total_count": len(results), - "facets": facets, - "project_obj": project_obj, - } - ) - return render( - request, - 'search/elastic_search.html', - template_context, - ) - - -class GlobalSearchView(SearchViewBase): +class GlobalSearchView(TemplateView): """ Global search enabled for logged out users and anyone using the dashboard. @@ -135,19 +60,28 @@ class GlobalSearchView(SearchViewBase): - language: project language to filter by (only valid if type is project) """ - def get(self, request): + http_method_names = ['get'] + max_search_results = 50 + available_facets = ["language"] + template_name = "search/elastic_search.html" + + def get_context_data(self, **kwargs): + context = super().get_context_data(**kwargs) user_input = UserInput( - query=request.GET.get('q'), - type=request.GET.get('type', 'project'), - language=request.GET.get('language'), + query=self.request.GET.get('q'), + type=self.request.GET.get('type', 'file'), + language=self.request.GET.get('language'), ) if user_input.type == "file": - return self._searh_files() - return self._search_projects(user_input, request) + context.update(self._searh_files()) + else: + context.update(self._search_projects(user_input, self.request)) + return context def _searh_files(self): results, facets = [], {} search_query = "" + total_count = 0 query = self.request.GET.get("q") if query: backend = Backend( @@ -156,30 +90,29 @@ def _searh_files(self): allow_search_all=not settings.ALLOW_PRIVATE_REPOS, ) search_query = backend.parser.query - search = backend.search() + use_advanced_query = should_use_advanced_query(backend.projects) + search = backend.search(use_advanced_query=use_advanced_query) if search: results = search[: self.max_search_results].execute() facets = results.facets + total_count = results.hits.total["value"] results = PageSearchSerializer( results, projects=backend.projects, many=True, ).data - return render( - self.request, - "search/elastic_search.html", - { - "query": query, - "search_query": search_query, - "results": results, - "facets": facets, - "total_count": len(results), - "type": "file", - }, - ) + return { + "query": query, + "search_query": search_query, + "results": results, + "facets": facets, + "total_count": total_count, + "type": "file", + } def _search_projects(self, user_input, request): + total_count = 0 projects = [] # If we allow private projects, # we only search on projects the user belongs or have access to. @@ -198,20 +131,44 @@ def _search_projects(self, user_input, request): projects=projects, use_advanced_query=True, ) - - results = ProjectSearchSerializer(results, many=True).data - template_context = user_input._asdict() - template_context.update( + total_count = results.hits.total["value"] + results = ProjectSearchSerializer(results, many=True).data + context = user_input._asdict() + context.update( { "search_query": user_input.query, "results": results, - "total_count": len(results), + "total_count": total_count, "facets": facets, } ) + return context + + def _search(self, *, user_input, projects, use_advanced_query): + """Return search results and facets given a `user_input` and `projects` to filter by.""" + if not user_input.query: + return [], {} + + filters = {} + for avail_facet in self.available_facets: + value = getattr(user_input, avail_facet, None) + if value: + filters[avail_facet] = value - return render( - request, - 'search/elastic_search.html', - template_context, + search = ProjectSearch( + query=user_input.query, + filters=filters, + projects=projects, + use_advanced_query=use_advanced_query, ) + results = search[:self.max_search_results].execute() + facets = results.facets + + # Make sure the selected facets are displayed, + # even when they return 0 results. + for facet in facets: + value = getattr(user_input, facet, None) + if value and value not in (name for name, *_ in facets[facet]): + facets[facet].insert(0, (value, 0, True)) + + return results, facets diff --git a/readthedocs/templates/core/project_bar_base.html b/readthedocs/templates/core/project_bar_base.html index 5e6252991e9..1b25a857a3f 100644 --- a/readthedocs/templates/core/project_bar_base.html +++ b/readthedocs/templates/core/project_bar_base.html @@ -41,7 +41,7 @@

  • {% trans "Downloads" %}
  • -
  • {% trans "Search" %}
  • +
  • {% trans "Search" %}
  • {% trans "Builds" %}
  • From 36be623f607416fb2f6f0caa05e4a8b870f74da7 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 27 Sep 2022 19:58:36 -0500 Subject: [PATCH 09/35] Black --- readthedocs/core/templatetags/core_tags.py | 2 -- readthedocs/search/api/v3/utils.py | 1 + readthedocs/search/api/v3/views.py | 2 +- readthedocs/search/views.py | 31 +++++++++++++--------- 4 files changed, 20 insertions(+), 16 deletions(-) diff --git a/readthedocs/core/templatetags/core_tags.py b/readthedocs/core/templatetags/core_tags.py index 6675940d839..ad77a11b31d 100644 --- a/readthedocs/core/templatetags/core_tags.py +++ b/readthedocs/core/templatetags/core_tags.py @@ -5,9 +5,7 @@ from urllib.parse import urlencode from django import template -from django.conf import settings from django.core.serializers.json import DjangoJSONEncoder -from django.utils.encoding import force_bytes, force_str from django.utils.safestring import mark_safe from readthedocs import __version__ diff --git a/readthedocs/search/api/v3/utils.py b/readthedocs/search/api/v3/utils.py index 5b4e88c01dc..7e3c3b82f9b 100644 --- a/readthedocs/search/api/v3/utils.py +++ b/readthedocs/search/api/v3/utils.py @@ -1,5 +1,6 @@ from readthedocs.projects.models import Feature + def should_use_advanced_query(projects): # TODO: we should make this a parameter in the API, # we are checking if the first project has this feature for now. diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 599af347809..8ee6748ef24 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -1,5 +1,4 @@ import structlog -from readthedocs.search.api.v3.utils import should_use_advanced_query from django.utils import timezone from django.utils.translation import gettext as _ from rest_framework.exceptions import ValidationError @@ -9,6 +8,7 @@ from readthedocs.search.api import SearchPagination from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer +from readthedocs.search.api.v3.utils import should_use_advanced_query log = structlog.get_logger(__name__) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 0fc35afb8d5..2897e91c227 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -1,19 +1,19 @@ """Search views.""" import collections +from urllib.parse import urlencode import structlog from django.conf import settings -from readthedocs.search.api.v3.utils import should_use_advanced_query -from django.views import View -from django.urls import reverse from django.http.response import HttpResponseRedirect +from django.urls import reverse +from django.views import View from django.views.generic import TemplateView -from urllib.parse import urlencode from readthedocs.projects.models import Project from readthedocs.search.api.v2.serializers import ProjectSearchSerializer from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer +from readthedocs.search.api.v3.utils import should_use_advanced_query from readthedocs.search.faceted_search import ProjectSearch log = structlog.get_logger(__name__) @@ -40,11 +40,15 @@ class ProjectSearchView(View): - q: search term """ - http_method_names = ['get'] + http_method_names = ["get"] def get(self, request, project_slug): query = request.GET.get("q", "") - url = reverse("search") + "?" + urlencode({"q": f"project:{project_slug} {query}"}) + url = ( + reverse("search") + + "?" + + urlencode({"q": f"project:{project_slug} {query}"}) + ) return HttpResponseRedirect(url) @@ -60,20 +64,20 @@ class GlobalSearchView(TemplateView): - language: project language to filter by (only valid if type is project) """ - http_method_names = ['get'] + http_method_names = ["get"] max_search_results = 50 available_facets = ["language"] template_name = "search/elastic_search.html" def get_context_data(self, **kwargs): - context = super().get_context_data(**kwargs) + context = super().get_context_data(**kwargs) user_input = UserInput( - query=self.request.GET.get('q'), - type=self.request.GET.get('type', 'file'), - language=self.request.GET.get('language'), + query=self.request.GET.get("q"), + type=self.request.GET.get("type", "file"), + language=self.request.GET.get("language"), ) if user_input.type == "file": - context.update(self._searh_files()) + context.update(self._searh_files()) else: context.update(self._search_projects(user_input, self.request)) return context @@ -161,7 +165,8 @@ def _search(self, *, user_input, projects, use_advanced_query): projects=projects, use_advanced_query=use_advanced_query, ) - results = search[:self.max_search_results].execute() + # pep8 and blank don't agree on having a space before :. + results = search[:self.max_search_results].execute() # noqa facets = results.facets # Make sure the selected facets are displayed, From 74b93846cc34cc2f3bf0bc20b2cbfc62ce0085c0 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 12:40:56 -0500 Subject: [PATCH 10/35] Refactor --- readthedocs/api/v3/proxied_urls.py | 6 +- readthedocs/search/api/v3/backend.py | 58 +++++++++++++++---- .../api/v3/{parser.py => queryparser.py} | 25 ++++++-- readthedocs/search/api/v3/serializers.py | 12 ++++ readthedocs/search/api/v3/views.py | 11 ++++ readthedocs/search/views.py | 5 +- 6 files changed, 98 insertions(+), 19 deletions(-) rename readthedocs/search/api/v3/{parser.py => queryparser.py} (62%) diff --git a/readthedocs/api/v3/proxied_urls.py b/readthedocs/api/v3/proxied_urls.py index 51dd7b53704..d446203d37a 100644 --- a/readthedocs/api/v3/proxied_urls.py +++ b/readthedocs/api/v3/proxied_urls.py @@ -5,12 +5,14 @@ so they can make use of features that require to have access to their cookies. """ -from django.conf.urls import re_path +from django.urls import path, re_path from readthedocs.api.v3.proxied_views import ProxiedEmbedAPI +from readthedocs.search.api.v3.views import ProxiedSearchAPI api_proxied_urls = [ - re_path(r'embed/', ProxiedEmbedAPI.as_view(), name='embed_api_v3'), + re_path("embed/", ProxiedEmbedAPI.as_view(), name="embed_api_v3"), + path("search/", ProxiedSearchAPI.as_view(), name="search_api_v3"), ] urlpatterns = api_proxied_urls diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index 31018bff9c9..68e63cfe736 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -3,42 +3,80 @@ from readthedocs.builds.models import Version from readthedocs.projects.models import Project -from readthedocs.search.api.v3.parser import SearchQueryParser +from readthedocs.search.api.v3.queryparser import SearchQueryParser from readthedocs.search.faceted_search import PageSearch class Backend: - max_projects = 100 - - def __init__(self, *, request, query, allow_search_all=False): + """ + Parse the query, search, and return the projects used in the search. + + :param arguments_required: If `True` and the user didn't provide + any arguments in the query, we don't perform the search. + :param default_all: If `True` and `arguments_required` is `False` + we search all projects by default, otherwise we search all projects + the user has access to. + :param max_projects: The maximum number of projects used in the search. + This limit is only applied for projects given explicitly, + not when we default to search all projects. + """ + + def __init__( + self, + *, + request, + query, + arguments_required=True, + default_all=False, + max_projects=100 + ): self.request = request self.query = query - self.allow_search_all = allow_search_all + self.arguments_required = arguments_required + self.default_all = default_all + self.max_projects = max_projects @cached_property def projects(self): + """ + Return all projects used in this search. + + If empty, it will search all projects. + + :returns: A list of tuples (project, version). + """ return list(islice(self._get_projects_to_search(), self.max_projects)) def search(self, **kwargs): + """ + Perform the search. + + :param kwargs: All kwargs are passed to the `PageSearch` constructor. + """ + if not self._has_arguments and self.arguments_required: + return None + projects = {project.slug: version.slug for project, version in self.projects} # If the search is done without projects, ES will search on all projects. # If we don't have projects and the user provided arguments, # it means we don't have anything to search on (no results). # Or if we don't have projects and we don't allow searching all, # we also just return. - if not projects and (self._has_arguments or not self.allow_search_all): + if not projects and (self._has_arguments or not self.default_all): return None - queryset = PageSearch( + search = PageSearch( query=self.parser.query, projects=projects, **kwargs, ) - return queryset + return search def _get_projects_to_search(self): if not self._has_arguments: + if self.arguments_required: + return None return self._get_default_projects() for value in self.parser.arguments["project"]: @@ -135,12 +173,12 @@ def _get_project_version(self, project, version_slug, include_hidden=True): .first() ) - @property + @cached_property def _has_arguments(self): return any(self.parser.arguments.values()) def _get_default_projects(self): - if self.allow_search_all: + if self.default_all: # Default to search all. return [] return self._get_projects_from_user() diff --git a/readthedocs/search/api/v3/parser.py b/readthedocs/search/api/v3/queryparser.py similarity index 62% rename from readthedocs/search/api/v3/parser.py rename to readthedocs/search/api/v3/queryparser.py index 369800d362e..9d99576f75b 100644 --- a/readthedocs/search/api/v3/parser.py +++ b/readthedocs/search/api/v3/queryparser.py @@ -12,6 +12,8 @@ def __init__(self, *, name, value, type): class SearchQueryParser: + """Simplified and minimal parser for ``name:value`` expressions.""" + allowed_arguments = { "project": list, "subprojects": list, @@ -21,27 +23,40 @@ class SearchQueryParser: def __init__(self, query): self._query = query self.query = "" - self.arguments = {} + # Set all arguments to their default values. + self.arguments = {name: type() for name, type in self.allowed_arguments.items()} def parse(self): + r""" + Parse the expression into a query and arguments. + + The parser steps are: + + - Split the string using white spaces. + - Tokenize each string into a ``text`` or ``argument`` token. + A valid argument has the ``name:value`` form, + and it's declared in `allowed_arguments`, + anything else is considered a text token. + - All text tokens are concatenated to form the final query. + + To interpret an argument as text, it can be escaped as ``name\:value``. + """ tokens = (self._get_token(text) for text in self._query.split()) query = [] - arguments = {name: type() for name, type in self.allowed_arguments.items()} for token in tokens: if isinstance(token, TextToken): query.append(token.text) elif isinstance(token, ArgumentToken): if token.type == str: - arguments[token.name] = token.value + self.arguments[token.name] = token.value elif token.type == list: - arguments[token.name].append(token.value) + self.arguments[token.name].append(token.value) else: raise ValueError(f"Invalid argument type {token.type}") else: raise ValueError("Invalid node") self.query = self._unescape(" ".join(query)) - self.arguments = arguments def _get_token(self, text): result = text.split(":", maxsplit=1) diff --git a/readthedocs/search/api/v3/serializers.py b/readthedocs/search/api/v3/serializers.py index 3359f29aee6..ac6c9177f3e 100644 --- a/readthedocs/search/api/v3/serializers.py +++ b/readthedocs/search/api/v3/serializers.py @@ -7,6 +7,18 @@ class PageSearchSerializer(PageSearchSerializerBase): + """ + Serializer for API V3. + + This is very similar to the serializer from V2, + with the following changes: + + - ``project`` is an object, not a string. + - ``version`` is an object, not a string. + - ``project_alias`` isn't present, + it is contained in the ``project`` object. + """ + project = serializers.SerializerMethodField() version = serializers.SerializerMethodField() diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 8ee6748ef24..d5e38af0539 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -4,6 +4,7 @@ from rest_framework.exceptions import ValidationError from rest_framework.generics import GenericAPIView +from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.search import tasks from readthedocs.search.api import SearchPagination from readthedocs.search.api.v3.backend import Backend @@ -115,3 +116,13 @@ def list(self): ] response.data["query"] = self._get_search_query() return response + + +class BaseProxiedSearchAPI(SearchAPI): + + pass + + +class ProxiedSearchAPI(SettingsOverrideObject): + + _default_class = BaseProxiedSearchAPI diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 2897e91c227..0ece6e9a331 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -91,7 +91,8 @@ def _searh_files(self): backend = Backend( request=self.request, query=query, - allow_search_all=not settings.ALLOW_PRIVATE_REPOS, + arguments_required=False, + default_all=not settings.ALLOW_PRIVATE_REPOS, ) search_query = backend.parser.query use_advanced_query = should_use_advanced_query(backend.projects) @@ -166,7 +167,7 @@ def _search(self, *, user_input, projects, use_advanced_query): use_advanced_query=use_advanced_query, ) # pep8 and blank don't agree on having a space before :. - results = search[:self.max_search_results].execute() # noqa + results = search[: self.max_search_results].execute() # noqa facets = results.facets # Make sure the selected facets are displayed, From d5d6248df9c6888fbd688a578b2c3f080c1a3372 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 13:28:39 -0500 Subject: [PATCH 11/35] Move server-side-search docs --- docs/user/config-file/v2.rst | 2 +- docs/user/features.rst | 2 +- docs/user/flyout-menu.rst | 2 +- docs/user/guides/advanced-search.rst | 2 +- docs/user/index.rst | 4 ++-- .../{server-side-search.rst => server-side-search/index.rst} | 0 6 files changed, 6 insertions(+), 6 deletions(-) rename docs/user/{server-side-search.rst => server-side-search/index.rst} (100%) diff --git a/docs/user/config-file/v2.rst b/docs/user/config-file/v2.rst index c29509efc85..9a83b1ed580 100644 --- a/docs/user/config-file/v2.rst +++ b/docs/user/config-file/v2.rst @@ -684,7 +684,7 @@ Do a recursive clone of the submodules. search ~~~~~~ -Settings for more control over :doc:`/server-side-search`. +Settings for more control over :doc:`/server-side-search/index`. .. code-block:: yaml diff --git a/docs/user/features.rst b/docs/user/features.rst index 51f03aaed6a..f714c5e9907 100644 --- a/docs/user/features.rst +++ b/docs/user/features.rst @@ -73,7 +73,7 @@ We offer a number of search features: * Search across projects you have access to (available on |com_brand|) * A full range of :doc:`search operators ` including exact matching and excluding phrases. -Learn more about :doc:`/server-side-search`. +Learn more about :doc:`/server-side-search/index`. Open Source and Customer Focused -------------------------------- diff --git a/docs/user/flyout-menu.rst b/docs/user/flyout-menu.rst index 2107c591d8f..c88bae32f54 100644 --- a/docs/user/flyout-menu.rst +++ b/docs/user/flyout-menu.rst @@ -15,7 +15,7 @@ The flyout menu provides access to the following bits of Read the Docs functiona * :doc:`Downloadable formats ` for the current version, including HTML & PDF downloads that are enabled by the project. * Links to the Read the Docs dashboard for the project. * Links to your :doc:`VCS provider ` that allow the user to quickly find the exact file that the documentation was rendered from. -* A search bar that gives users access to our :doc:`/server-side-search` of the current version. +* A search bar that gives users access to our :doc:`/server-side-search/index` of the current version. Closed ~~~~~~ diff --git a/docs/user/guides/advanced-search.rst b/docs/user/guides/advanced-search.rst index f00a847eada..956e78309f8 100644 --- a/docs/user/guides/advanced-search.rst +++ b/docs/user/guides/advanced-search.rst @@ -1,7 +1,7 @@ Using advanced search features ============================== -Read the Docs uses :doc:`/server-side-search` to power our search. +Read the Docs uses :doc:`/server-side-search/index` to power our search. This guide explains how to add a "search as you type" feature to your documentation, and how to use advanced query syntax to get more accurate results. diff --git a/docs/user/index.rst b/docs/user/index.rst index cfc874fda4e..78314535838 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -78,7 +78,7 @@ and some of the core features of Read the Docs. :doc:`/versions` | :doc:`/downloadable-documentation` | :doc:`/hosting` | - :doc:`/server-side-search` | + :doc:`/server-side-search/index` | :doc:`/analytics` | :doc:`/pull-requests` | :doc:`/build-notifications` | @@ -111,7 +111,7 @@ and some of the core features of Read the Docs. /versions /downloadable-documentation /hosting - /server-side-search + /server-side-search/index /analytics /pull-requests /build-notifications diff --git a/docs/user/server-side-search.rst b/docs/user/server-side-search/index.rst similarity index 100% rename from docs/user/server-side-search.rst rename to docs/user/server-side-search/index.rst From a91456d2a151e67ea0ad6cb3bc8b1e5cb54f4c9c Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 18:23:38 -0500 Subject: [PATCH 12/35] Add docs --- docs/user/analytics.rst | 2 +- docs/user/api/v3.rst | 2 +- docs/user/build-customization.rst | 2 +- docs/user/features.rst | 4 +- docs/user/guides/administrators.rst | 1 - docs/user/guides/advanced-search.rst | 97 -------- docs/user/index.rst | 1 - docs/user/server-side-search/api.rst | 279 ++++++++++++++++++++++++ docs/user/server-side-search/index.rst | 194 ++++------------ docs/user/server-side-search/syntax.rst | 142 ++++++++++++ docs/user/tutorial/index.rst | 2 +- readthedocs/search/api/v3/views.py | 15 +- 12 files changed, 487 insertions(+), 254 deletions(-) delete mode 100644 docs/user/guides/advanced-search.rst create mode 100644 docs/user/server-side-search/api.rst create mode 100644 docs/user/server-side-search/syntax.rst diff --git a/docs/user/analytics.rst b/docs/user/analytics.rst index 0762dddb206..bc20b83962e 100644 --- a/docs/user/analytics.rst +++ b/docs/user/analytics.rst @@ -16,7 +16,7 @@ and then click on :guilabel:`Traffic Analytics`. Traffic analytics demo -You can also access to analytics data from :ref:`search results `. +You can also access to analytics data from :ref:`search results `. .. note:: diff --git a/docs/user/api/v3.rst b/docs/user/api/v3.rst index 4d05c4a0a4b..f72043623b6 100644 --- a/docs/user/api/v3.rst +++ b/docs/user/api/v3.rst @@ -1869,4 +1869,4 @@ Embed Additional APIs --------------- -- :ref:`Server side search API `. +- :doc:`Server side search API `. diff --git a/docs/user/build-customization.rst b/docs/user/build-customization.rst index c8e833000fb..2ad064b0e7e 100644 --- a/docs/user/build-customization.rst +++ b/docs/user/build-customization.rst @@ -344,7 +344,7 @@ Read the Docs will automatically index the content of all your HTML files, respecting the :ref:`search ` options from your config file. You can access the search results from the :guilabel:`Search` tab of your project, -or by using the :ref:`search API `. +or by using the :doc:`/server-side-search/api`. .. note:: diff --git a/docs/user/features.rst b/docs/user/features.rst index f714c5e9907..8af36093d5e 100644 --- a/docs/user/features.rst +++ b/docs/user/features.rst @@ -70,8 +70,8 @@ We offer a number of search features: * Search across :doc:`subprojects ` * Search results land on the exact content you were looking for -* Search across projects you have access to (available on |com_brand|) -* A full range of :doc:`search operators ` including exact matching and excluding phrases. +* Search across projects you have access to +* A full range of :doc:`search operators ` including exact matching and excluding phrases. Learn more about :doc:`/server-side-search/index`. diff --git a/docs/user/guides/administrators.rst b/docs/user/guides/administrators.rst index b993883341e..053fe68283c 100644 --- a/docs/user/guides/administrators.rst +++ b/docs/user/guides/administrators.rst @@ -14,7 +14,6 @@ have a look at our :doc:`/tutorial/index`. technical-docs-seo-guide manage-translations-sphinx - advanced-search hiding-a-version deprecating-content pdf-non-ascii-languages diff --git a/docs/user/guides/advanced-search.rst b/docs/user/guides/advanced-search.rst deleted file mode 100644 index 956e78309f8..00000000000 --- a/docs/user/guides/advanced-search.rst +++ /dev/null @@ -1,97 +0,0 @@ -Using advanced search features -============================== - -Read the Docs uses :doc:`/server-side-search/index` to power our search. -This guide explains how to add a "search as you type" feature to your documentation, -and how to use advanced query syntax to get more accurate results. - -.. contents:: Table of contents - :local: - :backlinks: none - :depth: 3 - -Enable "search as you type" in your documentation -------------------------------------------------- - -`readthedocs-sphinx-search`_ is a Sphinx extension that integrates your -documentation more closely with the search implementation of Read the Docs. -It adds a clean and minimal full-page search UI that supports a **search as you type** feature. - -To try this feature, -you can press :guilabel:`/` (forward slash) and start typing or just visit these URLs: - -- https://docs.readthedocs.io/?rtd_search=contributing -- https://docs.readthedocs.io/?rtd_search=api/v3/projects/ - -Search query syntax -------------------- - -Read the Docs uses the `Simple Query String`_ feature from `Elasticsearch`_. -This means that as the search query becomes more complex, -the results yielded become more specific. - -Exact phrase search -~~~~~~~~~~~~~~~~~~~ - -If a query is wrapped in ``"`` (double quotes), -then only those results where the phrase is exactly matched will be returned. - -Example queries: - -- https://docs.readthedocs.io/?rtd_search=%22custom%20css%22 -- https://docs.readthedocs.io/?rtd_search=%22adding%20a%20subproject%22 -- https://docs.readthedocs.io/?rtd_search=%22when%20a%20404%20is%20returned%22 - -Exact phrase search with slop value -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -``~N`` (tilde N) after a phrase signifies slop amount. -It can be used to match words that are near one another. - -Example queries: - -- https://docs.readthedocs.io/?rtd_search=%22dashboard%20admin%22~2 -- https://docs.readthedocs.io/?rtd_search=%22single%20documentation%22~1 -- https://docs.readthedocs.io/?rtd_search=%22read%20the%20docs%20story%22~5 - -Prefix query -~~~~~~~~~~~~ - -``*`` (asterisk) at the end of any term signifies a prefix query. -It returns the results containing the words with specific prefix. - -Example queries: - -- https://docs.readthedocs.io/?rtd_search=API%20v* -- https://docs.readthedocs.io/?rtd_search=single%20v*%20doc* -- https://docs.readthedocs.io/?rtd_search=build*%20and%20c*%20to%20doc* - -Fuzzy query -~~~~~~~~~~~ - -``~N`` after a word signifies edit distance (fuzziness). -This type of query is helpful when the exact spelling of the keyword is unknown. -It returns results that contain terms similar to the search term as measured by a `Levenshtein edit distance`_. - -Example queries: - -- https://docs.readthedocs.io/?rtd_search=reedthedcs~2 -- https://docs.readthedocs.io/?rtd_search=authentation~3 -- https://docs.readthedocs.io/?rtd_search=configurtion~1 - - -Build complex queries -~~~~~~~~~~~~~~~~~~~~~ - -The search query syntaxes described in the previous sections can be used with one another to build complex queries. - -For example: - -- https://docs.readthedocs.io/?rtd_search=auto*%20redirect* -- https://docs.readthedocs.io/?rtd_search=abandon*%20proj* -- https://docs.readthedocs.io/?rtd_search=localisation~3%20of%20doc* - -.. _Elasticsearch: https://www.elastic.co/products/elasticsearch -.. _readthedocs-sphinx-search: https://readthedocs-sphinx-search.readthedocs.io/ -.. _Simple Query String: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html# -.. _Levenshtein edit distance: https://en.wikipedia.org/wiki/Levenshtein_distance diff --git a/docs/user/index.rst b/docs/user/index.rst index 78314535838..892f31de843 100644 --- a/docs/user/index.rst +++ b/docs/user/index.rst @@ -145,7 +145,6 @@ and how to write successful documentation. * **For project administrators**: :doc:`/guides/technical-docs-seo-guide` | :doc:`/guides/manage-translations-sphinx` | - :doc:`/guides/advanced-search` | :doc:`/guides/private-submodules` | :doc:`More guides for administrators ` diff --git a/docs/user/server-side-search/api.rst b/docs/user/server-side-search/api.rst new file mode 100644 index 00000000000..9a4d9bf4631 --- /dev/null +++ b/docs/user/server-side-search/api.rst @@ -0,0 +1,279 @@ +Server Side Search API +====================== + +You can integrate our :doc:`server side search ` in your documentation by using our API. + +If you are using :doc:`/commercial/index` you will need to replace +https://readthedocs.org/ with https://readthedocs.com/ in all the URLs used in the following examples. +Check :ref:`server-side-search/api:authentication and authorization` if you are using private versions. + +.. contents:: Table of contents + :local: + :backlinks: none + :depth: 3 + +API V3 +------ + +.. http:get:: /api/v3/search/ + + Return a list of search results for a project or subset of projects. + Results are divided into sections with highlights of the matching term. + + .. Request + + :query q: Search query (see :doc:`/server-side-search/syntax`) + :query page: Jump to a specific page + :query page_size: Limits the results per page, default is 50 + + .. Response + + :>json string type: The type of the result, currently page is the only type. + :>json string project: The project object + :>json string version: The version object + :>json string title: The title of the page + :>json string domain: Canonical domain of the resulting page + :>json string path: Path to the resulting page + :>json object highlights: An object containing a list of substrings with matching terms. + Note that the text is HTML escaped with the matching terms inside a tag. + :>json object blocks: + + A list of block objects containing search results from the page. + Currently, there are two types of blocks: + + - section: A page section with a linkable anchor (``id`` attribute). + - domain: A Sphinx :doc:`domain ` + with a linkable anchor (``id`` attribute). + + + **Example request**: + + .. tabs:: + + .. code-tab:: bash + + $ curl "https://readthedocs.org/api/v3/search/?q=project:docs%20server%20side%20search" + + .. code-tab:: python + + import requests + URL = 'https://readthedocs.org/api/v3/search/' + params = { + 'q': 'project:docs server side search', + } + response = requests.get(URL, params=params) + print(response.json()) + + **Example response**: + + .. sourcecode:: json + + { + "count": 41, + "next": "https://readthedocs.org/api/v3/search/?page=2&q=project:docs%20server+side+search", + "previous": null, + "results": [ + { + "type": "page", + "project": { + "slug": "docs", + "alias": null + }, + "version": { + "slug": "latest" + }, + "title": "Server Side Search", + "domain": "https://docs.readthedocs.io", + "path": "/en/latest/server-side-search.html", + "highlights": { + "title": [ + "Server Side Search" + ] + }, + "blocks": [ + { + "type": "section", + "id": "server-side-search", + "title": "Server Side Search", + "content": "Read the Docs provides full-text search across all of the pages of all projects, this is powered by Elasticsearch.", + "highlights": { + "title": [ + "Server Side Search" + ], + "content": [ + "You can search all projects at https://readthedocs.org/search/" + ] + } + }, + { + "type": "domain", + "role": "http:get", + "name": "/_/api/v2/search/", + "id": "get--_-api-v2-search-", + "content": "Retrieve search results for docs", + "highlights": { + "name": [""], + "content": ["Retrieve search results for docs"] + } + } + ] + }, + ] + } + + +Migrating from API V2 +~~~~~~~~~~~~~~~~~~~~~ + +Instead of using query arguments to specify the project +and version to search, you need to do it from the query itself, +this is if you had the following parameters: + +- project: docs +- version: latest +- q: test + +Now you need to use: + +- q: project:docs/latest test + +The response of the API is very similar to V2, +with the following changes: + +- ``project`` is an object, not a string. +- ``version`` is an object, not a string. +- ``project_alias`` isn't present, + it is contained in the ``project`` object. + +When searching on a parent project, +results from their subprojects won't be included automatically, +to include results from subprojects use the ``subprojects`` paramater. + +Authentication and authorization +-------------------------------- + +If you are using :ref:`private versions `, +users will only be allowed to search projects they have permissions over. +Authentication and authorization is done using the current session, +or any of the valid :doc:`sharing methods `. + +To be able to use the user's current session you need to use the API from the domain where your docs are being served +(``/_/api/v2/search/``). +This is ``https://docs.readthedocs-hosted.com/_/api/v2/search/`` +for the ``https://docs.readthedocs-hosted.com/`` project, for example. + +API V2 (deprecated) +------------------- + +.. note:: + + Please use our :ref:`server-side-search/api:api v3` instead, + see :ref:`server-side-search/api:migrating from api v2`. + +.. http:get:: /api/v2/search/ + + Return a list of search results for a project, + including results from its :doc:`/subprojects`. + Results are divided into sections with highlights of the matching term. + + .. Request + + :query q: Search query + :query project: Project slug + :query version: Version slug + :query page: Jump to a specific page + :query page_size: Limits the results per page, default is 50 + + .. Response + + :>json string type: The type of the result, currently page is the only type. + :>json string project: The project slug + :>json string project_alias: Alias of the project if it's a subproject. + :>json string version: The version slug + :>json string title: The title of the page + :>json string domain: Canonical domain of the resulting page + :>json string path: Path to the resulting page + :>json object highlights: An object containing a list of substrings with matching terms. + Note that the text is HTML escaped with the matching terms inside a tag. + :>json object blocks: + + A list of block objects containing search results from the page. + Currently, there are two types of blocks: + + - section: A page section with a linkable anchor (``id`` attribute). + - domain: A Sphinx :doc:`domain ` + with a linkable anchor (``id`` attribute). + + + **Example request**: + + .. tabs:: + + .. code-tab:: bash + + $ curl "https://readthedocs.org/api/v2/search/?project=docs&version=latest&q=server%20side%20search" + + .. code-tab:: python + + import requests + URL = 'https://readthedocs.org/api/v2/search/' + params = { + 'q': 'server side search', + 'project': 'docs', + 'version': 'latest', + } + response = requests.get(URL, params=params) + print(response.json()) + + **Example response**: + + .. sourcecode:: json + + { + "count": 41, + "next": "https://readthedocs.org/api/v2/search/?page=2&project=read-the-docs&q=server+side+search&version=latest", + "previous": null, + "results": [ + { + "type": "page", + "project": "docs", + "project_alias": null, + "version": "latest", + "title": "Server Side Search", + "domain": "https://docs.readthedocs.io", + "path": "/en/latest/server-side-search.html", + "highlights": { + "title": [ + "Server Side Search" + ] + }, + "blocks": [ + { + "type": "section", + "id": "server-side-search", + "title": "Server Side Search", + "content": "Read the Docs provides full-text search across all of the pages of all projects, this is powered by Elasticsearch.", + "highlights": { + "title": [ + "Server Side Search" + ], + "content": [ + "You can search all projects at https://readthedocs.org/search/" + ] + } + }, + { + "type": "domain", + "role": "http:get", + "name": "/_/api/v2/search/", + "id": "get--_-api-v2-search-", + "content": "Retrieve search results for docs", + "highlights": { + "name": [""], + "content": ["Retrieve search results for docs"] + } + } + ] + }, + ] + } diff --git a/docs/user/server-side-search/index.rst b/docs/user/server-side-search/index.rst index 87bbc285568..cd19098e682 100644 --- a/docs/user/server-side-search/index.rst +++ b/docs/user/server-side-search/index.rst @@ -6,11 +6,17 @@ this is powered by Elasticsearch_. You can search all projects at https://readthedocs.org/search/, or search only on your project from the :guilabel:`Search` tab of your project. -We override the default search engine of your project with our search engine -to provide you better results within your project. -We fallback to the built-in search engine from your project -if our search engine doesn't return any results, -just in case we missed something |:smile:| +.. contents:: Table of contents + :local: + :backlinks: none + :depth: 3 + +.. toctree:: + :maxdepth: 2 + :glob: + :hidden: + + * Search features --------------- @@ -31,28 +37,38 @@ Full control over which results should be listed first allowing you to deprecate content, and always show relevant content to your users first. See :ref:`config-file/v2:search.ranking`. -Search across projects you have access to (|com_brand|) - This allows you to search across all the projects you access to in your Dashboard. +Search across projects you have access to + Search across all the projects you access to in your Dashboard. **Don't remember where you found that document the other day? No problem, you can search across them all.** -Special query syntax for more specific results. + You can also specify what projects you want to search + using the ``project:{name}`` syntax, for example: + `"project:docs project:dev search"`_. + See :doc:`/server-side-search/syntax`. + +Special query syntax for more specific results We support a full range of search queries. - You can see some examples in our :ref:`guides/advanced-search:search query syntax` guide. + You can see some examples at :ref:`server-side-search/syntax:special queries`. -Configurable. +Configurable Tweak search results according to your needs using a :ref:`configuration file `. -.. - Code object searching - With the user of :doc:`Sphinx Domains ` we are able to automatically provide direct search results to your Code objects. - You can try this out with our docs here by searching for - TODO: Find good examples in our docs, API maybe? +Ready to use + We override the default search engine of your Sphinx project with ours + to provide you with all these benefits within your project. + We fallback to the built-in search engine from your project if ours doesn't return any results, + just in case we missed something |:smile:|. + +API + Integrate our search as you like. + See :doc:`/server-side-search/api`. .. _"full-text search": https://docs.readthedocs.io/en/latest/search.html?q=%22full-text+search%22 +.. _"project:docs project:dev search": https://docs.readthedocs.io/en/latest/search.html?q=project:docs+project:dev+search -Search Analytics +Search analytics ---------------- Know what your users are looking for in your docs. @@ -69,134 +85,18 @@ and then click on :guilabel:`Search Analytics`. .. _Elasticsearch: https://www.elastic.co/products/elasticsearch -API ---- - -If you are using :doc:`/commercial/index` you will need to replace -https://readthedocs.org/ with https://readthedocs.com/ in all the URLs used in the following examples. -Check :ref:`server-side-search:authentication and authorization` if you are using private versions. - -.. warning:: - - This API isn't stable yet, some small things may change in the future. - -.. http:get:: /api/v2/search/ - - Return a list of search results for a project, - including results from its :doc:`/subprojects`. - Results are divided into sections with highlights of the matching term. - - .. Request - - :query q: Search query - :query project: Project slug - :query version: Version slug - :query page: Jump to a specific page - :query page_size: Limits the results per page, default is 50 - - .. Response - - :>json string type: The type of the result, currently page is the only type. - :>json string project: The project slug - :>json string project_alias: Alias of the project if it's a subproject. - :>json string version: The version slug - :>json string title: The title of the page - :>json string domain: Canonical domain of the resulting page - :>json string path: Path to the resulting page - :>json object highlights: An object containing a list of substrings with matching terms. - Note that the text is HTML escaped with the matching terms inside a tag. - :>json object blocks: - - A list of block objects containing search results from the page. - Currently, there are two types of blocks: - - - section: A page section with a linkable anchor (``id`` attribute). - - domain: A Sphinx :doc:`domain ` - with a linkable anchor (``id`` attribute). - - - **Example request**: - - .. tabs:: - - .. code-tab:: bash - - $ curl "https://readthedocs.org/api/v2/search/?project=docs&version=latest&q=server%20side%20search" - - .. code-tab:: python - - import requests - URL = 'https://readthedocs.org/api/v2/search/' - params = { - 'q': 'server side search', - 'project': 'docs', - 'version': 'latest', - } - response = requests.get(URL, params=params) - print(response.json()) - - **Example response**: - - .. sourcecode:: json - - { - "count": 41, - "next": "https://readthedocs.org/api/v2/search/?page=2&project=read-the-docs&q=server+side+search&version=latest", - "previous": null, - "results": [ - { - "type": "page", - "project": "docs", - "project_alias": null, - "version": "latest", - "title": "Server Side Search", - "domain": "https://docs.readthedocs.io", - "path": "/en/latest/server-side-search.html", - "highlights": { - "title": [ - "Server Side Search" - ] - }, - "blocks": [ - { - "type": "section", - "id": "server-side-search", - "title": "Server Side Search", - "content": "Read the Docs provides full-text search across all of the pages of all projects, this is powered by Elasticsearch.", - "highlights": { - "title": [ - "Server Side Search" - ], - "content": [ - "You can search all projects at https://readthedocs.org/search/" - ] - } - }, - { - "type": "domain", - "role": "http:get", - "name": "/_/api/v2/search/", - "id": "get--_-api-v2-search-", - "content": "Retrieve search results for docs", - "highlights": { - "name": [""], - "content": ["Retrieve search results for docs"] - } - } - ] - }, - ] - } - -Authentication and authorization -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -If you are using :ref:`private versions `, -users will only be allowed to search projects they have permissions over. -Authentication and authorization is done using the current session, -or any of the valid :doc:`sharing methods `. - -To be able to use the user's current session you need to use the API from the domain where your docs are being served -(``/_/api/v2/search/``). -This is ``https://docs.readthedocs-hosted.com/_/api/v2/search/`` -for the ``https://docs.readthedocs-hosted.com/`` project, for example. + +Search as you type +------------------ + +`readthedocs-sphinx-search`_ is a Sphinx extension that integrates your +documentation more closely with the search implementation of Read the Docs. +It adds a clean and minimal full-page search UI that supports a **search as you type** feature. + +To try this feature, +you can press :guilabel:`/` (forward slash) and start typing or just visit these URLs: + +- https://docs.readthedocs.io/?rtd_search=contributing +- https://docs.readthedocs.io/?rtd_search=api/v3/projects/ + +.. _readthedocs-sphinx-search: https://readthedocs-sphinx-search.readthedocs.io/ diff --git a/docs/user/server-side-search/syntax.rst b/docs/user/server-side-search/syntax.rst new file mode 100644 index 00000000000..89eecfa20ab --- /dev/null +++ b/docs/user/server-side-search/syntax.rst @@ -0,0 +1,142 @@ +Search Query Syntax +=================== + +When searching on Read the Docs, you can use some parameters in your +query in order to search on given projects, versions, +or to get more accurate results. + +.. contents:: Table of contents + :local: + :backlinks: none + :depth: 3 + +Parameters +---------- + +Parameters are in the form of ``name:value``, +they can appear anywhere in the query, +and depending on the parameter, you can use one or more of them. + +Any other text that isn't a parameter will be part of the search query. +If you don't want your search term to be interpreted as a parameter, +you can escape it like ``project\:docs``. + +.. note:: + + Unknown parameters like ``foo:bar`` don't require escaping + +The available parameters are: + +project + Indicates the project and version to includes results from + (this doesn’t include subprojects or translations). + If the version isn’t provided, the default version will be used. + More than one parameter can be included. + + Examples: + + - ``project:docs test`` + - ``project:docs/latest test`` + - ``project:docs/stable project:dev test`` + +subprojects + Include results from the given project and its subprojects. + If the version isn't provided, the default version of all projects will be used, + if a version is provided, all subprojects matching that version + will be included, and if they don't have a version with that name, + we use their default version. + More than one parameter can be included. + + Examples: + + - ``subprojects:docs test`` + - ``subprojects:docs/latest test`` + - ``subprojects:docs/stable subprojects:dev test`` + +user + Include results from projects the given user has access to. + The only supported value is ``@me``, + which is an alias for the current user. + Only one parameter can be included, + if duplicated, the last one will override the previous one. + + Examples: + + - ``user:@me test`` + +Permissions +~~~~~~~~~~~ + +If the user doesn’t have permission over one version, +or if the version doesn’t exist, we don’t include results from that version. + +The API will return all the projects that were used in the final search, +with that information you can check which projects were used in the search. + +Limitations +~~~~~~~~~~~ + +In order to keep our search usable for everyone, +you can search up to 100 projects at a time. +If the resulting query includes more than 100 projects, +they will be omitted from the final search. + +This syntax is only available when using our search API V3 +or when using the global search (https://readthedocs.org/search/). + +Special queries +--------------- + +Read the Docs uses the `Simple Query String`_ feature from `Elasticsearch`_. +This means that as the search query becomes more complex, +the results yielded become more specific. + +.. _Simple Query String: https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl-simple-query-string-query.html# +.. _Elasticsearch: https://www.elastic.co/products/elasticsearch + +Exact phrase search +~~~~~~~~~~~~~~~~~~~ + +If a query is wrapped in ``"`` (double quotes), +then only those results where the phrase is exactly matched will be returned. + +Examples: + +- ``"custom css"`` +- ``"adding a subproject"`` +- ``"when a 404 is returned"`` + +Prefix query +~~~~~~~~~~~~ + +``*`` (asterisk) at the end of any term signifies a prefix query. +It returns the results containing the words with specific prefix. + +Examples: + +- ``test*`` +- ``build*`` + +Fuzziness +~~~~~~~~~ + +``~N`` (tilde followed by a number) after a word indicates edit distance (fuzziness). +This type of query is helpful when the exact spelling of the keyword is unknown. +It returns results that contain terms similar to the search term. + +Examples: + +- ``doks~1`` +- ``test~2`` +- ``getter~2`` + +Words close to each other +~~~~~~~~~~~~~~~~~~~~~~~~~ + +``~N`` (tilde followed by a number) after a phrase can be used to match words that are near to each other. + +Examples: + +- ``"dashboard admin"~2`` +- ``"single documentation"~1`` +- ``"read the docs policy"~5`` diff --git a/docs/user/tutorial/index.rst b/docs/user/tutorial/index.rst index fba1f577a65..21d7cefa97c 100644 --- a/docs/user/tutorial/index.rst +++ b/docs/user/tutorial/index.rst @@ -626,7 +626,7 @@ Browsing Search Analytics ~~~~~~~~~~~~~~~~~~~~~~~~~ Apart from traffic analytics, Read the Docs also offers the possibility -to inspect :ref:`what search terms your readers use ` +to inspect :ref:`what search terms your readers use ` on your documentation. This can inform decisions on what areas to reinforce, or what parts of your project are less understood or more difficult to find. diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index d5e38af0539..f421b25396f 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -110,12 +110,23 @@ def list(self): page, many=True, projects=self._get_projects_to_search() ) response = self.paginator.get_paginated_response(serializer.data) + self._add_extra_fields(response) + return response + + def _add_extra_fields(self, response): + """Add additional fields to the response.""" response.data["projects"] = [ - [project.slug, version.slug] + { + "slug": project.slug, + "versions": [ + { + "slug": version.slug + } + ] + } for project, version in self._get_projects_to_search() ] response.data["query"] = self._get_search_query() - return response class BaseProxiedSearchAPI(SearchAPI): From 9f1c52e2fb975aa179e32a14525aa3305a5d9d1b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 18:33:06 -0500 Subject: [PATCH 13/35] Fixes --- docs/user/server-side-search/api.rst | 13 +++++++++++-- readthedocs/search/api/v3/views.py | 9 +-------- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/docs/user/server-side-search/api.rst b/docs/user/server-side-search/api.rst index 9a4d9bf4631..f954c0c781b 100644 --- a/docs/user/server-side-search/api.rst +++ b/docs/user/server-side-search/api.rst @@ -72,6 +72,15 @@ API V3 "count": 41, "next": "https://readthedocs.org/api/v3/search/?page=2&q=project:docs%20server+side+search", "previous": null, + "projects": [ + { + "slug": "docs", + "versions": [ + {"slug": "latest"} + ] + } + ], + "query": "server side search", "results": [ { "type": "page", @@ -158,8 +167,8 @@ Authentication and authorization is done using the current session, or any of the valid :doc:`sharing methods `. To be able to use the user's current session you need to use the API from the domain where your docs are being served -(``/_/api/v2/search/``). -This is ``https://docs.readthedocs-hosted.com/_/api/v2/search/`` +(``/_/api/v3/search/``). +This is ``https://docs.readthedocs-hosted.com/_/api/v3/search/`` for the ``https://docs.readthedocs-hosted.com/`` project, for example. API V2 (deprecated) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index f421b25396f..5b7e0f8526d 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -116,14 +116,7 @@ def list(self): def _add_extra_fields(self, response): """Add additional fields to the response.""" response.data["projects"] = [ - { - "slug": project.slug, - "versions": [ - { - "slug": version.slug - } - ] - } + {"slug": project.slug, "versions": [{"slug": version.slug}]} for project, version in self._get_projects_to_search() ] response.data["query"] = self._get_search_query() From 77114f0d5ccfba06d40307fdaef8a5d525be86bd Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 18:49:08 -0500 Subject: [PATCH 14/35] Update links --- readthedocs/search/api/v3/views.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 5b7e0f8526d..094081c51d5 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -21,9 +21,9 @@ class SearchAPI(GenericAPIView): Required query parameters: - - **q**: Search term. + - **q**: [Search term](https://docs.readthedocs.io/page/server-side-search/syntax.html). - Check our [docs](https://docs.readthedocs.io/page/server-side-search.html#api) for more information. + Check our [docs](https://docs.readthedocs.io/page/server-side-search/api.html) for more information. """ # noqa http_method_names = ["get"] From bb62ad1664786954b4465fa21c6ec6f2155b3b43 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 28 Sep 2022 19:00:50 -0500 Subject: [PATCH 15/35] Fixes --- docs/user/server-side-search/syntax.rst | 3 +++ readthedocs/search/api/v3/backend.py | 9 ++++++++- readthedocs/urls.py | 2 +- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/docs/user/server-side-search/syntax.rst b/docs/user/server-side-search/syntax.rst index 89eecfa20ab..8db8c31a9d2 100644 --- a/docs/user/server-side-search/syntax.rst +++ b/docs/user/server-side-search/syntax.rst @@ -84,6 +84,9 @@ they will be omitted from the final search. This syntax is only available when using our search API V3 or when using the global search (https://readthedocs.org/search/). +Searching multiple versions of the same project isn't supported, +the last version will override the previous one. + Special queries --------------- diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index 68e63cfe736..b8ef65dc695 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -46,7 +46,14 @@ def projects(self): :returns: A list of tuples (project, version). """ - return list(islice(self._get_projects_to_search(), self.max_projects)) + projects = islice(self._get_projects_to_search(), self.max_projects) + # Make sure we are using just one version per-project, + # searching multiple versions of the same projects isn't supported yet. + projects_dict = { + project: version + for project, version in projects + } + return list(projects_dict.items()) def search(self, **kwargs): """ diff --git a/readthedocs/urls.py b/readthedocs/urls.py index db32f19b1f3..750329c4870 100644 --- a/readthedocs/urls.py +++ b/readthedocs/urls.py @@ -91,7 +91,7 @@ re_path(r'^api/v2/', include('readthedocs.api.v2.urls')), # Keep `search_api` at root level, so the test does not fail for other API re_path(r"^api/v2/search/$", include("readthedocs.search.api.v2.urls")), - path("^api/v3/search/", include("readthedocs.search.api.v3.urls")), + path("api/v3/search/", include("readthedocs.search.api.v3.urls")), # Deprecated re_path(r'^api/v1/embed/', include('readthedocs.embed.urls')), re_path(r'^api/v2/embed/', include('readthedocs.embed.urls')), From fe82da2c715a0dfaf290a87892b069e1ab99b8bc Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 29 Sep 2022 15:38:58 -0500 Subject: [PATCH 16/35] Fix tests --- readthedocs/rtd_tests/tests/test_privacy_urls.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/readthedocs/rtd_tests/tests/test_privacy_urls.py b/readthedocs/rtd_tests/tests/test_privacy_urls.py index 646535edc2e..0282a2a659f 100644 --- a/readthedocs/rtd_tests/tests/test_privacy_urls.py +++ b/readthedocs/rtd_tests/tests/test_privacy_urls.py @@ -220,10 +220,11 @@ class PublicProjectMixin(ProjectMixin): } response_data = { # Public - '/projects/': {'status_code': 301}, - '/projects/pip/downloads/pdf/latest/': {'status_code': 200}, - '/projects/pip/badge/': {'status_code': 200}, - '/projects/invalid_slug/': {'status_code': 302}, + "/projects/": {"status_code": 301}, + "/projects/pip/downloads/pdf/latest/": {"status_code": 200}, + "/projects/pip/badge/": {"status_code": 200}, + "/projects/invalid_slug/": {"status_code": 302}, + "/projects/pip/search/": {"status_code": 302}, } def test_public_urls(self): From f0f89a2475971669ad5e4d219d9df21fe9ab5da7 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 3 Oct 2022 19:58:54 -0500 Subject: [PATCH 17/35] Tests --- readthedocs/builds/querysets.py | 4 +- readthedocs/search/api/v3/backend.py | 7 +- readthedocs/search/api/v3/tests/test_api.py | 725 ++++++++++++++++++ .../search/api/v3/tests/test_queryparser.py | 86 +++ readthedocs/search/api/v3/views.py | 4 +- 5 files changed, 819 insertions(+), 7 deletions(-) create mode 100644 readthedocs/search/api/v3/tests/test_api.py create mode 100644 readthedocs/search/api/v3/tests/test_queryparser.py diff --git a/readthedocs/builds/querysets.py b/readthedocs/builds/querysets.py index b0e23d644e0..7dde39fe718 100644 --- a/readthedocs/builds/querysets.py +++ b/readthedocs/builds/querysets.py @@ -103,7 +103,9 @@ def public( if user.is_superuser: queryset = self.all() else: - queryset = self._add_from_user_projects(queryset, user) + queryset = self._add_from_user_projects( + queryset, user, admin=True, member=True + ) if project: queryset = queryset.filter(project=project) if only_active: diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index b8ef65dc695..01f8f554c43 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -49,10 +49,7 @@ def projects(self): projects = islice(self._get_projects_to_search(), self.max_projects) # Make sure we are using just one version per-project, # searching multiple versions of the same projects isn't supported yet. - projects_dict = { - project: version - for project, version in projects - } + projects_dict = {project: version for project, version in projects} return list(projects_dict.items()) def search(self, **kwargs): @@ -149,7 +146,7 @@ def _get_subprojects(self, project, version_slug=None): ) if version and self._has_permission(self.request.user, version): - yield project, version + yield subproject, version def _has_permission(self, user, version): """ diff --git a/readthedocs/search/api/v3/tests/test_api.py b/readthedocs/search/api/v3/tests/test_api.py new file mode 100644 index 00000000000..a2546e53fca --- /dev/null +++ b/readthedocs/search/api/v3/tests/test_api.py @@ -0,0 +1,725 @@ +import itertools +from unittest import mock + +import pytest +from django.contrib.auth.models import User +from django.core.management import call_command +from django.test import TestCase, override_settings +from django.urls import reverse +from django_dynamic_fixture import get + +from readthedocs.builds.models import Version +from readthedocs.organizations.models import Organization, Team +from readthedocs.projects.constants import PRIVATE, PUBLIC +from readthedocs.projects.models import HTMLFile, Project +from readthedocs.search.documents import PageDocument + + +@pytest.mark.search +class SearchTestBase(TestCase): + def setUp(self): + call_command("search_index", "--delete", "-f") + call_command("search_index", "--create") + + def tearDown(self): + super().tearDown() + call_command("search_index", "--delete", "-f") + + def get_dummy_processed_json(self, extra=None): + """ + Return a dict to be used as data indexed by ES. + + :param extra: By default it returns some default values, + you can override this passing a dict to extra. + """ + extra = extra or {} + default = { + "path": "index.html", + "title": "Title", + "sections": [ + { + "id": "first", + "title": "First Paragraph", + "content": "First paragraph, content of interest: test.", + } + ], + "domain_data": [], + } + default.update(extra) + return default + + def create_index(self, version, files=None): + """ + Create a search index for `version` with files as content. + + :param version: Version object + :param files: A dictionary with the filename as key and a dict as value + to be passed to `get_dummy_processed_json`. + """ + files = files or {"index.html": {}} + for file, extra in files.items(): + html_file = HTMLFile.objects.filter( + project=version.project, version=version, name=file + ).first() + if not html_file: + html_file = get( + HTMLFile, + project=version.project, + version=version, + name=file, + ) + html_file.get_processed_json = mock.MagicMock( + name="get_processed_json", + return_value=self.get_dummy_processed_json(extra), + ) + PageDocument().update(html_file) + + +@override_settings(ALLOW_PRIVATE_REPOS=False) +@override_settings(RTD_ALLOW_ORGANIZATIONS=False) +class SearchAPITest(SearchTestBase): + def setUp(self): + super().setUp() + self.user = get(User) + self.another_user = get(User) + self.project = get( + Project, slug="project", users=[self.user], privacy_level=PUBLIC + ) + self.another_project = get( + Project, + slug="another-project", + users=[self.another_user], + privacy_level=PUBLIC, + ) + + self.project.versions.update(privacy_level=PUBLIC, active=True, built=True) + self.version = self.project.versions.first() + + self.another_project.versions.update( + privacy_level=PUBLIC, active=True, built=True + ) + self.another_version = self.another_project.versions.first() + + self.url = reverse("search_api_v3") + self.client.force_login(self.user) + + for version in Version.objects.all(): + self.create_index(version) + + def get(self, *args, **kwargs): + return self.client.get(*args, **kwargs) + + def test_search_no_projects(self): + resp = self.get(self.url, data={"q": "test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(results, []) + self.assertEqual(projects, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project(self): + resp = self.get(self.url, data={"q": "project:project test"}) + + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_explicit_version(self): + resp = self.get(self.url, data={"q": "project:project/latest test"}) + + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + new_version = get( + Version, project=self.project, slug="v2", built=True, active=True + ) + self.create_index(new_version) + resp = self.get(self.url, data={"q": "project:project/v2 test"}) + + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, [{"slug": "project", "versions": [{"slug": "v2"}]}]) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_explicit_version_unexisting(self): + resp = self.get(self.url, data={"q": "project:project/v3 test"}) + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, []) + self.assertEqual(results, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_unexisting(self): + resp = self.get(self.url, data={"q": "project:foobar/latest test"}) + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, []) + self.assertEqual(results, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_valid_and_invalid(self): + resp = self.get( + self.url, data={"q": "project:foobar/latest project:project/latest test"} + ) + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + def test_search_multiple_projects(self): + resp = self.get( + self.url, data={"q": "project:project project:another-project test"} + ) + + self.assertEqual(resp.status_code, 200) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "another-project", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_me_anonymous_user(self): + self.client.logout() + resp = self.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, []) + self.assertEqual(results, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_me_logged_in_user(self): + self.client.force_login(self.user) + resp = self.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + self.client.force_login(self.another_user) + resp = self.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "another-project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_invalid_value(self): + self.client.force_login(self.user) + resp = self.get(self.url, data={"q": "user:test test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, []) + self.assertEqual(results, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_and_project(self): + self.client.force_login(self.user) + resp = self.get( + self.url, data={"q": "user:@me project:another-project/latest test"} + ) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "another-project", "versions": [{"slug": "latest"}]}, + {"slug": "project", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + def test_search_subprojects(self): + subproject = get( + Project, slug="subproject", users=[self.user], privacy_level=PUBLIC + ) + self.project.add_subproject(subproject) + subproject.versions.update(built=True, active=True, privacy_level=PUBLIC) + get(Version, slug="v2", project=self.project, active=True, built=True) + get(Version, slug="v3", project=self.project, active=True, built=True) + get(Version, slug="v2", project=subproject, active=True, built=True) + get(Version, slug="v4", project=subproject, active=True, built=True) + + for version in itertools.chain( + subproject.versions.all(), self.project.versions.all() + ): + self.create_index(version) + + # Search default version of the project and its subprojects. + resp = self.get(self.url, data={"q": "subprojects:project test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "subproject", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the latest version. + resp = self.get(self.url, data={"q": "subprojects:project/latest test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "subproject", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the v2 version. + resp = self.get(self.url, data={"q": "subprojects:project/v2 test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "v2"}]}, + {"slug": "subproject", "versions": [{"slug": "v2"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the v3 version. + # Only the main project has this version, + # we will default to the default version of its subproject. + resp = self.get(self.url, data={"q": "subprojects:project/v3 test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "v3"}]}, + {"slug": "subproject", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the v4 version. + # The main project doesn't have this version, + # we include results from its subprojects only. + resp = self.get(self.url, data={"q": "subprojects:project/v4 test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "subproject", "versions": [{"slug": "v4"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + +@pytest.mark.proxito +@override_settings(PUBLIC_DOMAIN="readthedocs.io", USE_SUBDOMAIN=True) +class ProxiedSearchAPITest(SearchAPITest): + + host = "docs.readthedocs.io" + + def get(self, *args, **kwargs): + return self.client.get(*args, HTTP_HOST=self.host, **kwargs) + + +@override_settings(ALLOW_PRIVATE_REPOS=True) +@override_settings(RTD_ALLOW_ORGANIZATIONS=True) +class SearchAPIWithOrganizationsTest(SearchTestBase): + def setUp(self): + super().setUp() + + self.user = get(User) + self.member = get(User) + self.project = get(Project, slug="project") + self.project.versions.update(built=True, active=True, privacy_level=PRIVATE) + self.version = self.project.versions.first() + self.version_public = get( + Version, + slug="public", + project=self.project, + privacy_level=PUBLIC, + active=True, + built=True, + ) + + self.project_b = get(Project, slug="project-b") + self.project_b.versions.update(built=True, active=True, privacy_level=PRIVATE) + self.version_b = self.project_b.versions.first() + self.version_b_public = get( + Version, + slug="public", + project=self.project_b, + privacy_level=PUBLIC, + built=True, + active=True, + ) + + self.organization = get( + Organization, owners=[self.user], projects=[self.project, self.project_b] + ) + + self.team = get( + Team, + members=[self.member], + organization=self.organization, + projects=[self.project_b], + access="readonly", + ) + + self.another_user = get(User) + self.another_project = get(Project, slug="another-project") + self.another_project.versions.update( + built=True, active=True, privacy_level=PRIVATE + ) + self.another_version = self.another_project.versions.first() + self.another_version_public = get( + Version, + slug="public", + project=self.another_project, + built=True, + active=True, + ) + + self.another_organization = get( + Organization, owners=[self.another_user], projects=[self.another_project] + ) + + self.url = reverse("search_api_v3") + self.client.force_login(self.user) + + for version in Version.objects.all(): + self.create_index(version) + + def test_search_no_projects(self): + resp = self.client.get(self.url, data={"q": "test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(results, []) + self.assertEqual(projects, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project(self): + resp = self.client.get(self.url, data={"q": "project:project test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(len(results), 1) + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_explicit_version(self): + resp = self.client.get(self.url, data={"q": "project:project/public test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(len(results), 1) + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "public"}]}] + ) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_no_permissions(self): + resp = self.client.get(self.url, data={"q": "project:another-project test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(results, []) + self.assertEqual(projects, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_private_version_anonymous_user(self): + self.client.logout() + + resp = self.client.get(self.url, data={"q": "project:project test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(results, []) + self.assertEqual(projects, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_project_public_version_anonymous_user(self): + self.client.logout() + + resp = self.client.get(self.url, data={"q": "project:project/public test"}) + + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(len(results), 1) + self.assertEqual( + projects, [{"slug": "project", "versions": [{"slug": "public"}]}] + ) + self.assertEqual(resp.data["query"], "test") + + def test_search_multiple_projects(self): + resp = self.client.get( + self.url, + data={ + "q": "project:project project:another-project/latest project:project-b/latest test" + }, + ) + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(len(results), 2) + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(resp.data["query"], "test") + + def test_search_multiple_projects_team_member(self): + self.client.force_login(self.member) + + resp = self.client.get( + self.url, + data={ + "q": "project:project project:another-project/latest project:project-b/latest test" + }, + ) + self.assertEqual(resp.status_code, 200) + results = resp.data["results"] + projects = resp.data["projects"] + self.assertEqual(len(results), 1) + self.assertEqual( + projects, [{"slug": "project-b", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_me_anonymous_user(self): + self.client.logout() + resp = self.client.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual(projects, []) + self.assertEqual(results, []) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_me_logged_in_user(self): + self.client.force_login(self.user) + resp = self.client.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + self.client.force_login(self.member) + resp = self.client.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "project-b", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + self.client.force_login(self.another_user) + resp = self.client.get(self.url, data={"q": "user:@me test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, [{"slug": "another-project", "versions": [{"slug": "latest"}]}] + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + def test_search_user_and_project(self): + self.client.force_login(self.member) + resp = self.client.get( + self.url, data={"q": "user:@me project:another-project/public test"} + ) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "another-project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + def test_search_subprojects(self): + self.project.add_subproject(self.project_b) + + # Search default version of the project and its subprojects. + resp = self.client.get(self.url, data={"q": "subprojects:project test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the latest version. + resp = self.client.get(self.url, data={"q": "subprojects:project/latest test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "latest"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the public version. + resp = self.client.get(self.url, data={"q": "subprojects:project/public test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "public"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + def test_search_subprojects_with_team_member(self): + self.client.force_login(self.member) + self.project.add_subproject(self.project_b) + + # Search default version of the project and its subprojects. + resp = self.client.get(self.url, data={"q": "subprojects:project test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the latest version. + resp = self.client.get(self.url, data={"q": "subprojects:project/latest test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) + self.assertEqual(len(results), 1) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the public version. + resp = self.client.get(self.url, data={"q": "subprojects:project/public test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "public"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") + + def test_search_subprojects_with_anonymous_user(self): + self.client.logout() + self.project.add_subproject(self.project_b) + + # Search default version of the project and its subprojects. + resp = self.client.get(self.url, data={"q": "subprojects:project test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [], + ) + self.assertEqual(len(results), 0) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the latest version. + resp = self.client.get(self.url, data={"q": "subprojects:project/latest test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [], + ) + self.assertEqual(len(results), 0) + self.assertEqual(resp.data["query"], "test") + + # Explicitly search on the public version. + resp = self.client.get(self.url, data={"q": "subprojects:project/public test"}) + projects = resp.data["projects"] + results = resp.data["results"] + self.assertEqual( + projects, + [ + {"slug": "project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "public"}]}, + ], + ) + self.assertEqual(len(results), 2) + self.assertEqual(resp.data["query"], "test") diff --git a/readthedocs/search/api/v3/tests/test_queryparser.py b/readthedocs/search/api/v3/tests/test_queryparser.py new file mode 100644 index 00000000000..f7d043affae --- /dev/null +++ b/readthedocs/search/api/v3/tests/test_queryparser.py @@ -0,0 +1,86 @@ +from django.test import TestCase + +from readthedocs.search.api.v3.queryparser import SearchQueryParser + + +class TestQueryParser(TestCase): + def test_no_arguments(self): + parser = SearchQueryParser("search query") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], []) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "search query") + + def test_project_arguments(self): + parser = SearchQueryParser("project:foo query") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], ["foo"]) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "query") + + def test_multiple_project_arguments(self): + parser = SearchQueryParser("project:foo query project:bar") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], ["foo", "bar"]) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "query") + + def test_user_argument(self): + parser = SearchQueryParser("query user:foo") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], []) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "foo") + self.assertEqual(parser.query, "query") + + def test_multiple_user_arguments(self): + parser = SearchQueryParser("search user:foo query user:bar") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], []) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "bar") + self.assertEqual(parser.query, "search query") + + def test_subprojects_argument(self): + parser = SearchQueryParser("search subprojects:foo query ") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], []) + self.assertEqual(arguments["subprojects"], ["foo"]) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "search query") + + def test_multiple_subprojects_arguments(self): + parser = SearchQueryParser("search subprojects:foo query subprojects:bar") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], []) + self.assertEqual(arguments["subprojects"], ["foo", "bar"]) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "search query") + + def test_escaped_argument(self): + parser = SearchQueryParser(r"project\:foo project:bar query") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], ["bar"]) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "") + self.assertEqual(parser.query, "project:foo query") + + def test_only_arguments(self): + parser = SearchQueryParser(r"project:foo user:bar") + parser.parse() + arguments = parser.arguments + self.assertEqual(arguments["project"], ["foo"]) + self.assertEqual(arguments["subprojects"], []) + self.assertEqual(arguments["user"], "bar") + self.assertEqual(parser.query, "") diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 094081c51d5..55fe11f2a83 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -1,3 +1,5 @@ +from functools import cached_property + import structlog from django.utils import timezone from django.utils.translation import gettext as _ @@ -42,7 +44,7 @@ def _validate_query_params(self): if errors: raise ValidationError(errors) - @property + @cached_property def _backend(self): backend = self.search_backend_class( request=self.request, From efe56ed6979e4560c2a82ad4d0c0a162a183ff7b Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 3 Oct 2022 20:09:14 -0500 Subject: [PATCH 18/35] Linter --- readthedocs/search/api/v3/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index 01f8f554c43..d13efe5d7bd 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -49,7 +49,7 @@ def projects(self): projects = islice(self._get_projects_to_search(), self.max_projects) # Make sure we are using just one version per-project, # searching multiple versions of the same projects isn't supported yet. - projects_dict = {project: version for project, version in projects} + projects_dict = dict(projects) return list(projects_dict.items()) def search(self, **kwargs): From 18f108b349df51a92f0aeb4d094d5877e2299cd7 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 6 Oct 2022 15:31:16 -0500 Subject: [PATCH 19/35] Format --- readthedocs/search/views.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 833c4639dcd..0ece6e9a331 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -1,12 +1,7 @@ """Search views.""" import collections from urllib.parse import urlencode -from readthedocs.projects.models import Project -from readthedocs.search.api.v2.serializers import ProjectSearchSerializer -from readthedocs.search.api.v3.backend import Backend -from readthedocs.search.api.v3.serializers import PageSearchSerializer -from readthedocs.search.api.v3.utils import should_use_advanced_query -from readthedocs.search.faceted_search import ProjectSearch + import structlog from django.conf import settings from django.http.response import HttpResponseRedirect @@ -14,6 +9,13 @@ from django.views import View from django.views.generic import TemplateView +from readthedocs.projects.models import Project +from readthedocs.search.api.v2.serializers import ProjectSearchSerializer +from readthedocs.search.api.v3.backend import Backend +from readthedocs.search.api.v3.serializers import PageSearchSerializer +from readthedocs.search.api.v3.utils import should_use_advanced_query +from readthedocs.search.faceted_search import ProjectSearch + log = structlog.get_logger(__name__) UserInput = collections.namedtuple( From b027de48d55a3e665c1d57471e9b3efc14bcc39a Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 6 Oct 2022 15:45:39 -0500 Subject: [PATCH 20/35] This shouldn't be here --- readthedocs/search/api/__init__.py | 102 ----------------------------- 1 file changed, 102 deletions(-) delete mode 100644 readthedocs/search/api/__init__.py diff --git a/readthedocs/search/api/__init__.py b/readthedocs/search/api/__init__.py deleted file mode 100644 index 2800efc4da0..00000000000 --- a/readthedocs/search/api/__init__.py +++ /dev/null @@ -1,102 +0,0 @@ -from collections import namedtuple -from math import ceil - -from django.utils.translation import gettext as _ -from rest_framework.exceptions import NotFound -from rest_framework.pagination import PageNumberPagination - - -class PaginatorPage: - - """ - Mimics the result from a paginator. - - By using this class, we avoid having to override a lot of methods - of `PageNumberPagination` to make it work with the ES DSL object. - """ - - def __init__(self, page_number, total_pages, count): - self.number = page_number - Paginator = namedtuple("Paginator", ["num_pages", "count"]) - self.paginator = Paginator(total_pages, count) - - def has_next(self): - return self.number < self.paginator.num_pages - - def has_previous(self): - return self.number > 1 - - def next_page_number(self): - return self.number + 1 - - def previous_page_number(self): - return self.number - 1 - - -class SearchPagination(PageNumberPagination): - - """Paginator for the results of PageSearch.""" - - page_size = 50 - page_size_query_param = "page_size" - max_page_size = 100 - - def _get_page_number(self, number): - try: - if isinstance(number, float) and not number.is_integer(): - raise ValueError - number = int(number) - except (TypeError, ValueError): - number = -1 - return number - - def paginate_queryset(self, queryset, request, view=None): - """ - Override to get the paginated result from the ES queryset. - - This makes use of our custom paginator and slicing support from the ES DSL object, - instead of the one used by django's ORM. - - Mostly inspired by https://github.com/encode/django-rest-framework/blob/acbd9d8222e763c7f9c7dc2de23c430c702e06d4/rest_framework/pagination.py#L191 # noqa - """ - # Needed for other methods of this class. - self.request = request - - page_size = self.get_page_size(request) - page_number = request.query_params.get(self.page_query_param, 1) - - original_page_number = page_number - page_number = self._get_page_number(page_number) - - if page_number <= 0: - msg = self.invalid_page_message.format( - page_number=original_page_number, - message=_("Invalid page"), - ) - raise NotFound(msg) - - start = (page_number - 1) * page_size - end = page_number * page_size - - result = [] - total_count = 0 - total_pages = 1 - - if queryset: - result = queryset[start:end].execute() - total_count = result.hits.total["value"] - hits = max(1, total_count) - total_pages = ceil(hits / page_size) - - if total_pages > 1 and self.template is not None: - # The browsable API should display pagination controls. - self.display_page_controls = True - - # Needed for other methods of this class. - self.page = PaginatorPage( - page_number=page_number, - total_pages=total_pages, - count=total_count, - ) - - return result From 1539db6ad6a54ccf436fc6f47dd17289629bd751 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Thu, 6 Oct 2022 15:58:24 -0500 Subject: [PATCH 21/35] Fix import --- readthedocs/search/api/v3/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 55fe11f2a83..a7d6a81bdce 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -8,7 +8,7 @@ from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.search import tasks -from readthedocs.search.api import SearchPagination +from readthedocs.search.api.pagination import SearchPagination from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.api.v3.utils import should_use_advanced_query From ec30c8e62e95a6b52dba7bb8c06845f705219d8d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 12 Oct 2022 15:55:41 -0500 Subject: [PATCH 22/35] Fix tests --- readthedocs/search/api/v3/backend.py | 3 +- readthedocs/search/tests/test_views.py | 134 ++++--------------------- 2 files changed, 23 insertions(+), 114 deletions(-) diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index d13efe5d7bd..ed9a0c01ce5 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -81,7 +81,8 @@ def _get_projects_to_search(self): if not self._has_arguments: if self.arguments_required: return None - return self._get_default_projects() + yield from self._get_default_projects() + return None for value in self.parser.arguments["project"]: project, version = self._get_project_and_version(value) diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 563108f8faf..6252b544572 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -36,7 +36,7 @@ def test_search_by_project_name(self, client, project, all_projects): results, _ = self._get_search_result( url=self.url, client=client, - search_params={ 'q': project.name }, + search_params={ 'q': project.name, "type": "project"}, ) assert len(results) == 1 @@ -52,7 +52,7 @@ def test_search_project_have_correct_language_facets(self, client, project): results, facets = self._get_search_result( url=self.url, client=client, - search_params={ 'q': project.name }, + search_params={ 'q': project.name, "type": "project" }, ) lang_facets = facets['language'] @@ -67,7 +67,7 @@ def test_search_project_filter_language(self, client, project): """Test that searching project filtered according to language.""" # Create a project in bn and add it as a translation translate = get(Project, language='bn', name=project.name) - search_params = { 'q': project.name, 'language': 'bn' } + search_params = { 'q': project.name, 'language': 'bn', "type": "project"} results, facets = self._get_search_result( url=self.url, @@ -206,65 +206,6 @@ def test_file_search(self, client, project, data_type, page_num): # Make it lower because our search is case insensitive assert word.lower() in query.lower() - def test_file_search_have_correct_role_name_facets(self, client): - """Test that searching files should result all role_names.""" - - # searching for 'celery' to test that - # correct role_names are displayed - results, facets = self._get_search_result( - url=self.url, - client=client, - search_params={ 'q': 'celery', 'type': 'file' } - ) - assert len(results) >= 1 - role_name_facets = facets['role_name'] - role_name_facets_str = [facet[0] for facet in role_name_facets] - expected_role_names = ['py:class', 'py:function', 'py:method'] - assert sorted(expected_role_names) == sorted(role_name_facets_str) - for facet in role_name_facets: - assert facet[2] == False # because none of the facets are applied - - def test_file_search_filter_role_name(self, client): - """Test that searching files filtered according to role_names.""" - - search_params = { 'q': 'celery', 'type': 'file' } - # searching without the filter - results, facets = self._get_search_result( - url=self.url, - client=client, - search_params=search_params - ) - assert len(results) >= 2 # there are > 1 results without the filter - role_name_facets = facets['role_name'] - for facet in role_name_facets: - assert facet[2] == False # because none of the facets are applied - - confval_facet = 'py:class' - # checking if 'py:class' facet is present in results - assert confval_facet in [facet[0] for facet in role_name_facets] - - # filtering with role_name=py:class - search_params['role_name'] = confval_facet - new_results, new_facets = self._get_search_result( - url=self.url, - client=client, - search_params=search_params - ) - new_role_names_facets = new_facets['role_name'] - # All results from domains should have role_name='py:class'. - assert len(new_results) == 1 - first_result = new_results[0] - blocks = first_result['blocks'] - for block in blocks: - assert block['type'] == 'domain' - assert block['role'] == confval_facet - - for facet in new_role_names_facets: - if facet[0] == confval_facet: - assert facet[2] == True # because 'py:class' filter is active - else: - assert facet[2] == False - @pytest.mark.parametrize('data_type', DATA_TYPES_VALUES) @pytest.mark.parametrize('case', ['upper', 'lower', 'title']) def test_file_search_case_insensitive(self, client, project, case, data_type): @@ -319,13 +260,15 @@ def test_file_search_exact_match(self, client, project): client=client, search_params={ 'q': query, 'type': 'file' }) - # there must be only 1 result - # because the phrase is present in - # only one project - assert len(results) == 1 - assert results[0]['project'] == 'kuma' - assert results[0]['domain'] == 'http://readthedocs.org' - assert results[0]['path'] == '/docs/kuma/en/latest/documentation.html' + # There are two results, + # one from each version of the "kuma" project. + assert len(results) == 2 + assert results[0]["version"] == {"slug": "stable"} + assert results[1]["version"] == {"slug": "latest"} + for result in results: + assert result['project'] == {"alias": None, "slug": "kuma"} + assert result['domain'] == 'http://readthedocs.org' + assert result['path'].endswith('/documentation.html') blocks = results[0]['blocks'] assert len(blocks) == 1 @@ -337,35 +280,14 @@ def test_file_search_exact_match(self, client, project): for word in highlighted_words: assert word.lower() in query.lower() - def test_file_search_have_correct_project_facets(self, client, all_projects): - """Test that file search have correct project facets in results""" - - # `environment` word is present both in `kuma` and `docs` files - # so search with this phrase - query = 'environment' - results, facets = self._get_search_result( - url=self.url, - client=client, - search_params={ 'q': query, 'type': 'file' }, - ) - # There should be 2 search result - assert len(results) == 2 - project_facets = facets['project'] - project_facets_str = [facet[0] for facet in project_facets] - assert len(project_facets_str) == 2 - - # kuma and pipeline should be there - assert sorted(project_facets_str) == sorted(['kuma', 'docs']) - def test_file_search_filter_by_project(self, client): """Test that search result are filtered according to project.""" # `environment` word is present both in `kuma` and `docs` files # so search with this phrase but filter through `kuma` project search_params = { - 'q': 'environment', + 'q': 'project:kuma environment', 'type': 'file', - 'project': 'kuma' } results, facets = self._get_search_result( url=self.url, @@ -378,11 +300,10 @@ def test_file_search_filter_by_project(self, client): # There should be 1 search result as we have filtered assert len(results) == 1 # kuma should should be there only - assert 'kuma' == results[0]['project'] + assert {"alias": None, "slug": "kuma"} == results[0]['project'] - # But there should be 2 projects in the project facets - # as the query is present in both projects - assert sorted(resulted_project_facets) == sorted(['kuma', 'docs']) + # The projects we search is the only one included in the final results. + assert resulted_project_facets == ['kuma'] @pytest.mark.xfail(reason='Versions are not showing correctly! Fixme while rewrite!') def test_file_search_show_versions(self, client, all_projects, es_index, settings): @@ -412,30 +333,24 @@ def test_file_search_show_versions(self, client, all_projects, es_index, setting assert sorted(project_versions) == sorted(version_facets_str) def test_file_search_subprojects(self, client, all_projects, es_index): - """ - TODO: File search should return results from subprojects also. - - This is currently disabled because the UX around it is weird. - You filter by a project, and get results for multiple. - """ project = all_projects[0] subproject = all_projects[1] # Add another project as subproject of the project - project.add_subproject(subproject) + project.add_subproject(subproject, alias="subproject") # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = { - 'q': query, + 'q': f"subprojects:{project.slug} {query}", 'type': 'file', - 'project': project.slug, } results, _ = self._get_search_result( url=self.url, client=client, search_params=search_params, ) - assert len(results) == 0 + assert len(results) == 1 + assert results[0]["project"] == {"alias": "subproject", "slug": "pipeline"} @override_settings(ALLOW_PRIVATE_REPOS=True) def test_search_only_projects_owned_by_the_user(self, client, all_projects): @@ -451,15 +366,8 @@ def test_search_only_projects_owned_by_the_user(self, client, all_projects): ) assert len(results) > 0 - other_projects = [ - project.slug - for project in all_projects - if project.slug != 'docs' - ] - for result in results: - assert result['project'] == 'docs' - assert result['project'] not in other_projects + assert result['project'] == {"alias": None, "slug": 'docs'} @override_settings(ALLOW_PRIVATE_REPOS=True) def test_search_no_owned_projects(self, client, all_projects): From a27a6d45596911778987717b44fd97a463cd046a Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 12 Oct 2022 15:56:47 -0500 Subject: [PATCH 23/35] Black --- readthedocs/search/tests/test_views.py | 28 +++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 6252b544572..4327dbac005 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -36,7 +36,7 @@ def test_search_by_project_name(self, client, project, all_projects): results, _ = self._get_search_result( url=self.url, client=client, - search_params={ 'q': project.name, "type": "project"}, + search_params={"q": project.name, "type": "project"}, ) assert len(results) == 1 @@ -52,7 +52,7 @@ def test_search_project_have_correct_language_facets(self, client, project): results, facets = self._get_search_result( url=self.url, client=client, - search_params={ 'q': project.name, "type": "project" }, + search_params={"q": project.name, "type": "project"}, ) lang_facets = facets['language'] @@ -66,8 +66,8 @@ def test_search_project_have_correct_language_facets(self, client, project): def test_search_project_filter_language(self, client, project): """Test that searching project filtered according to language.""" # Create a project in bn and add it as a translation - translate = get(Project, language='bn', name=project.name) - search_params = { 'q': project.name, 'language': 'bn', "type": "project"} + translate = get(Project, language="bn", name=project.name) + search_params = {"q": project.name, "language": "bn", "type": "project"} results, facets = self._get_search_result( url=self.url, @@ -266,9 +266,9 @@ def test_file_search_exact_match(self, client, project): assert results[0]["version"] == {"slug": "stable"} assert results[1]["version"] == {"slug": "latest"} for result in results: - assert result['project'] == {"alias": None, "slug": "kuma"} - assert result['domain'] == 'http://readthedocs.org' - assert result['path'].endswith('/documentation.html') + assert result["project"] == {"alias": None, "slug": "kuma"} + assert result["domain"] == "http://readthedocs.org" + assert result["path"].endswith("/documentation.html") blocks = results[0]['blocks'] assert len(blocks) == 1 @@ -286,8 +286,8 @@ def test_file_search_filter_by_project(self, client): # `environment` word is present both in `kuma` and `docs` files # so search with this phrase but filter through `kuma` project search_params = { - 'q': 'project:kuma environment', - 'type': 'file', + "q": "project:kuma environment", + "type": "file", } results, facets = self._get_search_result( url=self.url, @@ -300,10 +300,10 @@ def test_file_search_filter_by_project(self, client): # There should be 1 search result as we have filtered assert len(results) == 1 # kuma should should be there only - assert {"alias": None, "slug": "kuma"} == results[0]['project'] + assert {"alias": None, "slug": "kuma"} == results[0]["project"] # The projects we search is the only one included in the final results. - assert resulted_project_facets == ['kuma'] + assert resulted_project_facets == ["kuma"] @pytest.mark.xfail(reason='Versions are not showing correctly! Fixme while rewrite!') def test_file_search_show_versions(self, client, all_projects, es_index, settings): @@ -341,8 +341,8 @@ def test_file_search_subprojects(self, client, all_projects, es_index): # Now search with subproject content but explicitly filter by the parent project query = get_search_query_from_project_file(project_slug=subproject.slug) search_params = { - 'q': f"subprojects:{project.slug} {query}", - 'type': 'file', + "q": f"subprojects:{project.slug} {query}", + "type": "file", } results, _ = self._get_search_result( url=self.url, @@ -367,7 +367,7 @@ def test_search_only_projects_owned_by_the_user(self, client, all_projects): assert len(results) > 0 for result in results: - assert result['project'] == {"alias": None, "slug": 'docs'} + assert result["project"] == {"alias": None, "slug": "docs"} @override_settings(ALLOW_PRIVATE_REPOS=True) def test_search_no_owned_projects(self, client, all_projects): From adec99886dafd5fccba498bcab707b3a5cb0b40a Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 12 Oct 2022 16:43:08 -0500 Subject: [PATCH 24/35] Fix test --- readthedocs/search/tests/test_views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/search/tests/test_views.py b/readthedocs/search/tests/test_views.py index 4327dbac005..57fc559b545 100644 --- a/readthedocs/search/tests/test_views.py +++ b/readthedocs/search/tests/test_views.py @@ -350,7 +350,7 @@ def test_file_search_subprojects(self, client, all_projects, es_index): search_params=search_params, ) assert len(results) == 1 - assert results[0]["project"] == {"alias": "subproject", "slug": "pipeline"} + assert results[0]["project"] == {"alias": "subproject", "slug": subproject.slug} @override_settings(ALLOW_PRIVATE_REPOS=True) def test_search_only_projects_owned_by_the_user(self, client, all_projects): From e6e16d96614bb036f0681fd95f91574219452c32 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 1 Nov 2022 17:14:48 -0500 Subject: [PATCH 25/35] Updates from review --- readthedocs/search/api/v3/backend.py | 10 +++++----- readthedocs/search/api/v3/views.py | 17 +++++++++++++++-- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index ed9a0c01ce5..0d4505be77b 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -96,11 +96,11 @@ def _get_projects_to_search(self): if version and self._has_permission(self.request.user, version): yield project, version - # If the user didn't provide a version, version_slug will be `None`, - # and we add all subprojects with their default version, - # otherwise we will add all projects that match the given version. - _, version_slug = self._split_project_and_version(value) if project: + # If the user didn't provide a version, version_slug will be `None`, + # and we add all subprojects with their default version, + # otherwise we will add all projects that match the given version. + _, version_slug = self._split_project_and_version(value) yield from self._get_subprojects( project=project, version_slug=version_slug, @@ -122,7 +122,7 @@ def _get_projects_from_user(self): def _get_subprojects(self, project, version_slug=None): """ - Get a tuple project/version of all subprojects of `project`. + Get a tuple (project, version) of all subprojects of `project`. If `version_slug` doesn't match a version of the subproject, the default version will be used. diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index a7d6a81bdce..da08a05e7bc 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -116,17 +116,30 @@ def list(self): return response def _add_extra_fields(self, response): - """Add additional fields to the response.""" + """ + Add additional fields to the top level response. + + These are fields that aren't part of the serializers, + and are related to the whole list, rather than each element. + """ + # Add all projects that were used in the final search. response.data["projects"] = [ {"slug": project.slug, "versions": [{"slug": version.slug}]} for project, version in self._get_projects_to_search() ] + # Add the query used in the final search, + # this doesn't include arguments. response.data["query"] = self._get_search_query() class BaseProxiedSearchAPI(SearchAPI): - pass + """ + Use a separate class for the proxied version of this view. + + This is so we can override it in .com, + where we need to make use of our auth backends. + """ class ProxiedSearchAPI(SettingsOverrideObject): From 3eded3eb591f6fe2b148cbe6b463f622f31c51ef Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Nov 2022 12:13:05 -0500 Subject: [PATCH 26/35] Inherit from API v3 settings --- readthedocs/search/api/v3/views.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index da08a05e7bc..b0f6a66c983 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -5,7 +5,9 @@ from django.utils.translation import gettext as _ from rest_framework.exceptions import ValidationError from rest_framework.generics import GenericAPIView +from rest_framework.permissions import AllowAny +from readthedocs.api.v3.views import APIv3Settings from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.search import tasks from readthedocs.search.api.pagination import SearchPagination @@ -16,7 +18,7 @@ log = structlog.get_logger(__name__) -class SearchAPI(GenericAPIView): +class SearchAPI(APIv3Settings, GenericAPIView): """ Server side search API V3. @@ -32,6 +34,7 @@ class SearchAPI(GenericAPIView): pagination_class = SearchPagination serializer_class = PageSearchSerializer search_backend_class = Backend + permission_classes = [AllowAny] def get_view_name(self): return "Search API V3" From 7210e6cf189b26996c4cbd46a082b4ba0e94ed99 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Nov 2022 18:49:08 -0500 Subject: [PATCH 27/35] Add a note about using yield --- readthedocs/search/api/v3/backend.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/backend.py index 0d4505be77b..89b8d9a4ac9 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/backend.py @@ -78,6 +78,13 @@ def search(self, **kwargs): return search def _get_projects_to_search(self): + """ + Return an iterator of (project, version) used in this search. + + An iterator (yield syntax) is used so we can stop at + ``self.max_projects``, this way we avoid fetching projects + that we won't use. + """ if not self._has_arguments: if self.arguments_required: return None From 2b02c36a8dec89488ed931793f25202b262de575 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Nov 2022 19:16:29 -0500 Subject: [PATCH 28/35] Rename Backend -> SearchExecutor --- readthedocs/search/api/v3/{backend.py => executor.py} | 2 +- readthedocs/search/api/v3/views.py | 6 +++--- readthedocs/search/views.py | 1 - 3 files changed, 4 insertions(+), 5 deletions(-) rename readthedocs/search/api/v3/{backend.py => executor.py} (99%) diff --git a/readthedocs/search/api/v3/backend.py b/readthedocs/search/api/v3/executor.py similarity index 99% rename from readthedocs/search/api/v3/backend.py rename to readthedocs/search/api/v3/executor.py index 89b8d9a4ac9..f5a7ff67123 100644 --- a/readthedocs/search/api/v3/backend.py +++ b/readthedocs/search/api/v3/executor.py @@ -7,7 +7,7 @@ from readthedocs.search.faceted_search import PageSearch -class Backend: +class SearchExecutor: """ Parse the query, search, and return the projects used in the search. diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index b0f6a66c983..8218a5dc8db 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -11,7 +11,7 @@ from readthedocs.core.utils.extend import SettingsOverrideObject from readthedocs.search import tasks from readthedocs.search.api.pagination import SearchPagination -from readthedocs.search.api.v3.backend import Backend +from readthedocs.search.api.v3.executor import SearchExecutor from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.api.v3.utils import should_use_advanced_query @@ -33,7 +33,7 @@ class SearchAPI(APIv3Settings, GenericAPIView): http_method_names = ["get"] pagination_class = SearchPagination serializer_class = PageSearchSerializer - search_backend_class = Backend + search_executor_class = SearchExecutor permission_classes = [AllowAny] def get_view_name(self): @@ -49,7 +49,7 @@ def _validate_query_params(self): @cached_property def _backend(self): - backend = self.search_backend_class( + backend = self.search_executor_class( request=self.request, query=self.request.GET["q"], ) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 0ece6e9a331..de3f9f3291a 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -11,7 +11,6 @@ from readthedocs.projects.models import Project from readthedocs.search.api.v2.serializers import ProjectSearchSerializer -from readthedocs.search.api.v3.backend import Backend from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.api.v3.utils import should_use_advanced_query from readthedocs.search.faceted_search import ProjectSearch From 5948cd275998406c1417f6ee4dc3c37ef4a17a1c Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Wed, 2 Nov 2022 19:26:13 -0500 Subject: [PATCH 29/35] More renaming --- readthedocs/search/api/v3/views.py | 12 ++++++------ readthedocs/search/views.py | 11 ++++++----- 2 files changed, 12 insertions(+), 11 deletions(-) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 8218a5dc8db..a959fbb64aa 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -48,18 +48,18 @@ def _validate_query_params(self): raise ValidationError(errors) @cached_property - def _backend(self): - backend = self.search_executor_class( + def _search_executor(self): + search_executor = self.search_executor_class( request=self.request, query=self.request.GET["q"], ) - return backend + return search_executor def _get_search_query(self): - return self._backend.parser.query + return self._search_executor.parser.query def _get_projects_to_search(self): - return self._backend.projects + return self._search_executor.projects def get_queryset(self): """ @@ -72,7 +72,7 @@ def get_queryset(self): is compatible with DRF's paginator. """ use_advanced_query = should_use_advanced_query(self._get_projects_to_search()) - search = self._backend.search( + search = self._search_executor.search( use_advanced_query=use_advanced_query, aggregate_results=False, ) diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index de3f9f3291a..aba3201d54a 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -11,6 +11,7 @@ from readthedocs.projects.models import Project from readthedocs.search.api.v2.serializers import ProjectSearchSerializer +from readthedocs.search.api.v3.executor import SearchExecutor from readthedocs.search.api.v3.serializers import PageSearchSerializer from readthedocs.search.api.v3.utils import should_use_advanced_query from readthedocs.search.faceted_search import ProjectSearch @@ -87,22 +88,22 @@ def _searh_files(self): total_count = 0 query = self.request.GET.get("q") if query: - backend = Backend( + search_executor = SearchExecutor( request=self.request, query=query, arguments_required=False, default_all=not settings.ALLOW_PRIVATE_REPOS, ) - search_query = backend.parser.query - use_advanced_query = should_use_advanced_query(backend.projects) - search = backend.search(use_advanced_query=use_advanced_query) + search_query = search_executor.parser.query + use_advanced_query = should_use_advanced_query(search_executor.projects) + search = search_executor.search(use_advanced_query=use_advanced_query) if search: results = search[: self.max_search_results].execute() facets = results.facets total_count = results.hits.total["value"] results = PageSearchSerializer( results, - projects=backend.projects, + projects=search_executor.projects, many=True, ).data From af54960fcc6b876514c539cf03745c0ce512abc2 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 14 Nov 2022 18:43:29 -0500 Subject: [PATCH 30/35] Don't depend on the order --- readthedocs/search/api/v3/tests/test_api.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/readthedocs/search/api/v3/tests/test_api.py b/readthedocs/search/api/v3/tests/test_api.py index a2546e53fca..894a3741739 100644 --- a/readthedocs/search/api/v3/tests/test_api.py +++ b/readthedocs/search/api/v3/tests/test_api.py @@ -586,11 +586,11 @@ def test_search_user_and_project(self): projects = resp.data["projects"] results = resp.data["results"] self.assertEqual( - projects, - [ + set(projects), + { {"slug": "another-project", "versions": [{"slug": "public"}]}, {"slug": "project-b", "versions": [{"slug": "latest"}]}, - ], + }, ) self.assertEqual(len(results), 2) self.assertEqual(resp.data["query"], "test") From 52d1cfefb7c028f58af704534934add5fd3b57ea Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 14 Nov 2022 19:13:49 -0500 Subject: [PATCH 31/35] We can't use dicts in a set :( --- readthedocs/search/api/v3/tests/test_api.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/readthedocs/search/api/v3/tests/test_api.py b/readthedocs/search/api/v3/tests/test_api.py index 894a3741739..26aa60828d1 100644 --- a/readthedocs/search/api/v3/tests/test_api.py +++ b/readthedocs/search/api/v3/tests/test_api.py @@ -585,13 +585,15 @@ def test_search_user_and_project(self): ) projects = resp.data["projects"] results = resp.data["results"] - self.assertEqual( - set(projects), - { - {"slug": "another-project", "versions": [{"slug": "public"}]}, - {"slug": "project-b", "versions": [{"slug": "latest"}]}, - }, - ) + expected_projects = [ + {"slug": "another-project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ] + # The order of the projects is different on .com. + # So we test each element separately. + self.assertEqual(len(projects), len(expected_projects)) + for expected_project in expected_projects: + self.assertIn(expected_project, projects) self.assertEqual(len(results), 2) self.assertEqual(resp.data["query"], "test") From ceadd855cb854c8f2a9fa53e680537f042ff5071 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 15 Nov 2022 11:54:10 -0500 Subject: [PATCH 32/35] False alarm, we just needed to set the version public --- readthedocs/search/api/v3/tests/test_api.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/readthedocs/search/api/v3/tests/test_api.py b/readthedocs/search/api/v3/tests/test_api.py index 26aa60828d1..04f69308acc 100644 --- a/readthedocs/search/api/v3/tests/test_api.py +++ b/readthedocs/search/api/v3/tests/test_api.py @@ -412,6 +412,7 @@ def setUp(self): Version, slug="public", project=self.another_project, + privacy_level=PUBLIC, built=True, active=True, ) @@ -585,15 +586,13 @@ def test_search_user_and_project(self): ) projects = resp.data["projects"] results = resp.data["results"] - expected_projects = [ - {"slug": "another-project", "versions": [{"slug": "public"}]}, - {"slug": "project-b", "versions": [{"slug": "latest"}]}, - ] - # The order of the projects is different on .com. - # So we test each element separately. - self.assertEqual(len(projects), len(expected_projects)) - for expected_project in expected_projects: - self.assertIn(expected_project, projects) + self.assertEqual( + projects, + [ + {"slug": "another-project", "versions": [{"slug": "public"}]}, + {"slug": "project-b", "versions": [{"slug": "latest"}]}, + ], + ) self.assertEqual(len(results), 2) self.assertEqual(resp.data["query"], "test") From 60324a6a1133322e3b3011af5fbccf09bfab8996 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 15 Nov 2022 13:31:58 -0500 Subject: [PATCH 33/35] Set privacy level explicitly --- readthedocs/search/api/v3/tests/test_api.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/readthedocs/search/api/v3/tests/test_api.py b/readthedocs/search/api/v3/tests/test_api.py index 04f69308acc..780c9c1eafc 100644 --- a/readthedocs/search/api/v3/tests/test_api.py +++ b/readthedocs/search/api/v3/tests/test_api.py @@ -144,7 +144,12 @@ def test_search_project_explicit_version(self): self.assertEqual(resp.data["query"], "test") new_version = get( - Version, project=self.project, slug="v2", built=True, active=True + Version, + project=self.project, + slug="v2", + privacy_level=PUBLIC, + built=True, + active=True, ) self.create_index(new_version) resp = self.get(self.url, data={"q": "project:project/v2 test"}) @@ -266,11 +271,12 @@ def test_search_subprojects(self): Project, slug="subproject", users=[self.user], privacy_level=PUBLIC ) self.project.add_subproject(subproject) - subproject.versions.update(built=True, active=True, privacy_level=PUBLIC) get(Version, slug="v2", project=self.project, active=True, built=True) get(Version, slug="v3", project=self.project, active=True, built=True) get(Version, slug="v2", project=subproject, active=True, built=True) get(Version, slug="v4", project=subproject, active=True, built=True) + subproject.versions.update(built=True, active=True, privacy_level=PUBLIC) + self.project.versions.update(built=True, active=True, privacy_level=PUBLIC) for version in itertools.chain( subproject.versions.all(), self.project.versions.all() From 119ece292b4410fd781649d8f1d5497fea011075 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 15 Nov 2022 14:47:16 -0500 Subject: [PATCH 34/35] Override global rate limit --- readthedocs/search/api/v3/views.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index a959fbb64aa..6e46472a94c 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -6,6 +6,7 @@ from rest_framework.exceptions import ValidationError from rest_framework.generics import GenericAPIView from rest_framework.permissions import AllowAny +from rest_framework.throttling import AnonRateThrottle, UserRateThrottle from readthedocs.api.v3.views import APIv3Settings from readthedocs.core.utils.extend import SettingsOverrideObject @@ -18,6 +19,19 @@ log = structlog.get_logger(__name__) +RATE_LIMIT = "100/minute" + + +class SearchAnonRateThrottle(AnonRateThrottle): + + rate = RATE_LIMIT + + +class SearchUserRateThrottle(UserRateThrottle): + + rate = RATE_LIMIT + + class SearchAPI(APIv3Settings, GenericAPIView): """ @@ -35,6 +49,7 @@ class SearchAPI(APIv3Settings, GenericAPIView): serializer_class = PageSearchSerializer search_executor_class = SearchExecutor permission_classes = [AllowAny] + throttle_classes = (SearchUserRateThrottle, SearchAnonRateThrottle) def get_view_name(self): return "Search API V3" From 3da874806ee2479387fa1b009b35bdbed0e3b8d5 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 15 Nov 2022 15:46:35 -0500 Subject: [PATCH 35/35] Comment --- readthedocs/search/api/v3/views.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/readthedocs/search/api/v3/views.py b/readthedocs/search/api/v3/views.py index 6e46472a94c..a7dafa11649 100644 --- a/readthedocs/search/api/v3/views.py +++ b/readthedocs/search/api/v3/views.py @@ -24,11 +24,15 @@ class SearchAnonRateThrottle(AnonRateThrottle): + """Rate limit for the search API for anonymous users.""" + rate = RATE_LIMIT class SearchUserRateThrottle(UserRateThrottle): + """Rate limit for the search API for authenticated users.""" + rate = RATE_LIMIT @@ -49,6 +53,9 @@ class SearchAPI(APIv3Settings, GenericAPIView): serializer_class = PageSearchSerializer search_executor_class = SearchExecutor permission_classes = [AllowAny] + # The search API would be used by anonymous users, + # and with our search-as-you-type extension. + # So we need to increase the rate limit. throttle_classes = (SearchUserRateThrottle, SearchAnonRateThrottle) def get_view_name(self):