-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Refactor search code #5197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor search code #5197
Changes from 50 commits
47e3c1a
a84bb15
6f36acd
0bfa998
936465d
0496cf6
ab28491
617f47f
d0f6082
af8a28f
5f197ec
465628e
e3e245c
e909759
49098d7
07dc26b
e9f3f3f
d6d02da
336e674
eb65817
144ed0e
d314f58
03ad482
5dfc17a
fcc9620
701ecdb
f636737
7583f9a
747da90
6c317f1
e2655de
f457ff7
7b2013c
0741a25
85956e0
6ac2f35
e062191
8e4cc2b
8c7bda4
5b9f460
e2e271b
a993f08
d52b968
fc277fa
417ea45
80c58c7
72d867f
5f118ff
e263e69
1a3e146
fab9f42
0a06726
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,54 @@ | ||
import logging | ||
from pprint import pformat | ||
|
||
from rest_framework import generics | ||
from rest_framework import serializers | ||
from rest_framework.exceptions import ValidationError | ||
from rest_framework.pagination import PageNumberPagination | ||
|
||
from readthedocs.search.documents import PageDocument | ||
from readthedocs.search.filters import SearchFilterBackend | ||
from readthedocs.search.pagination import SearchPagination | ||
from readthedocs.search.serializers import PageSearchSerializer | ||
from readthedocs.search.utils import get_project_list_or_404 | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class SearchPagination(PageNumberPagination): | ||
page_size = 25 | ||
page_size_query_param = 'page_size' | ||
max_page_size = 100 | ||
|
||
|
||
class PageSearchSerializer(serializers.Serializer): | ||
project = serializers.CharField() | ||
version = serializers.CharField() | ||
title = serializers.CharField() | ||
path = serializers.CharField() | ||
link = serializers.SerializerMethodField() | ||
highlight = serializers.SerializerMethodField() | ||
|
||
def get_link(self, obj): | ||
projects_url = self.context.get('projects_url') | ||
if projects_url: | ||
docs_url = projects_url[obj.project] | ||
return docs_url + obj.path | ||
|
||
def get_highlight(self, obj): | ||
highlight = getattr(obj.meta, 'highlight', None) | ||
if highlight: | ||
if hasattr(highlight, 'content'): | ||
# Change results to turn newlines in highlight into periods | ||
# https://github.com/rtfd/readthedocs.org/issues/5168 | ||
highlight.content = [result.replace('\n', '. ') for result in highlight.content] | ||
ret = highlight.to_dict() | ||
log.debug('API Search highlight: %s', pformat(ret)) | ||
return ret | ||
|
||
|
||
class PageSearchAPIView(generics.ListAPIView): | ||
|
||
"""Main entry point to perform a search using Elasticsearch.""" | ||
|
||
pagination_class = SearchPagination | ||
filter_backends = [SearchFilterBackend] | ||
serializer_class = PageSearchSerializer | ||
|
||
def get_queryset(self): | ||
|
@@ -24,10 +62,25 @@ def get_queryset(self): | |
# Validate all the required params are there | ||
self.validate_query_params() | ||
query = self.request.query_params.get('q', '') | ||
queryset = PageDocument.simple_search(query=query) | ||
kwargs = {'filter_user': False} | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: I'd like to come up with a better name for this. I'm thinking about What we want to communicate here is "filter versions by users permissions" I suppose, but I didn't find a good name for that :( |
||
kwargs['projects_list'] = [p.slug for p in self.get_all_projects()] | ||
kwargs['versions_list'] = self.request.query_params.get('version') | ||
user = self.request.user | ||
queryset = PageDocument.faceted_search( | ||
query=query, user=user, **kwargs | ||
) | ||
return queryset | ||
|
||
def validate_query_params(self): | ||
""" | ||
Validate all required query params are passed on the request. | ||
|
||
Query params required are: ``q``, ``project`` and ``version``. | ||
|
||
:rtype: None | ||
|
||
:raises: ValidationError if one of them is missing. | ||
""" | ||
required_query_params = {'q', 'project', 'version'} # python `set` literal is `{}` | ||
request_params = set(self.request.query_params.keys()) | ||
missing_params = required_query_params - request_params | ||
|
@@ -39,17 +92,47 @@ def validate_query_params(self): | |
raise ValidationError(errors) | ||
|
||
def get_serializer_context(self): | ||
context = super(PageSearchAPIView, self).get_serializer_context() | ||
context = super().get_serializer_context() | ||
context['projects_url'] = self.get_all_projects_url() | ||
return context | ||
|
||
def get_all_projects(self): | ||
""" | ||
Return a list containing the project itself and all its subprojects. | ||
|
||
The project slug is retrieved from ``project`` query param. | ||
|
||
:rtype: list | ||
|
||
:raises: Http404 if project is not found | ||
""" | ||
project_slug = self.request.query_params.get('project') | ||
version_slug = self.request.query_params.get('version') | ||
all_projects = get_project_list_or_404( | ||
project_slug=project_slug, user=self.request.user, version_slug=version_slug, | ||
) | ||
return all_projects | ||
|
||
def get_all_projects_url(self): | ||
""" | ||
Return a dict containing the project slug and its version URL. | ||
|
||
The dictionary contains the project and its subprojects . Each project's | ||
slug is used as a key and the documentation URL for that project and | ||
version as the value. | ||
|
||
Example: | ||
|
||
{ | ||
"requests": "https://requests.readthedocs.io/en/latest/", | ||
"requests-oauth": "https://requests-oauth.readthedocs.io/en/latest/", | ||
} | ||
|
||
:rtype: dict | ||
""" | ||
all_projects = self.get_all_projects() | ||
version_slug = self.request.query_params.get('version') | ||
project_slug = self.request.query_params.get('project') | ||
all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user) | ||
projects_url = {} | ||
|
||
for project in all_projects: | ||
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) | ||
|
||
return projects_url |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,10 +1,8 @@ | ||
"""Project app config""" | ||
|
||
from django.apps import AppConfig | ||
|
||
|
||
class SearchConfig(AppConfig): | ||
name = 'readthedocs.search' | ||
|
||
def ready(self): | ||
from .signals import index_html_file, remove_html_file | ||
import readthedocs.search.signals # noqa |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Can we take the serializers to
serializers.py
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I found it a lot of overhead to keep moving around between files when they were only used in one place. I find it easier this way to have all the things we need for the API in one place.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I also think we'll need to change our implementation here to support faceting. If we want to support facets, the current approach won't work, since the serializer never sees the facet objects.