-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
[fix #4265] Port Document search API for Elasticsearch 6.x #4309
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 1 commit
bb4e5aa
4dc3e35
fe2aef1
41a67a3
85b4686
b01981f
75de7f6
e2e8cbb
733b030
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
from rest_framework import generics | ||
|
||
from readthedocs.search.documents import PageDocument | ||
from readthedocs.search.filters import SearchFilterBackend | ||
from readthedocs.search.pagination import SearchPagination | ||
from readthedocs.search.serializers import PageSearchSerializer | ||
|
||
|
||
class PageSearchAPIView(generics.ListAPIView): | ||
pagination_class = SearchPagination | ||
filter_backends = [SearchFilterBackend] | ||
serializer_class = PageSearchSerializer | ||
|
||
def get_queryset(self): | ||
query = self.request.query_params.get('query') | ||
queryset = PageDocument.search(query=query) | ||
return queryset |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,6 @@ | ||
from django.conf import settings | ||
from django_elasticsearch_dsl import DocType, Index, fields | ||
from elasticsearch_dsl.query import SimpleQueryString, Bool | ||
|
||
from readthedocs.projects.models import Project, HTMLFile | ||
from .conf import SEARCH_EXCLUDED_FILE | ||
|
@@ -60,14 +61,19 @@ class Meta(object): | |
content = fields.TextField(attr='processed_json.content') | ||
path = fields.TextField(attr='processed_json.path') | ||
|
||
# Fields to perform search with weight | ||
search_fields = ['title^10', 'headers^5', 'content'] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this used by the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Its not actually used by There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Hrm, good point. I guess it's fine for now then. |
||
|
||
@classmethod | ||
def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None): | ||
es_query = cls.get_es_query(query=query) | ||
kwargs = { | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
'model': cls._doc_type.model, | ||
'query': query | ||
'query': es_query, | ||
'fields': cls.search_fields | ||
} | ||
filters = {} | ||
|
||
|
@@ -80,6 +86,32 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non | |
|
||
return FileSearch(**kwargs) | ||
|
||
@classmethod | ||
def search(cls, using=None, index=None, **kwargs): | ||
es_search = super(PageDocument, cls).search(using=using, index=index) | ||
query = kwargs.pop('query') | ||
es_query = cls.get_es_query(query=query) | ||
|
||
es_search = es_search.query(es_query) | ||
return es_search | ||
|
||
@classmethod | ||
def get_es_query(cls, query): | ||
"""Return the Elasticsearch query generated from the query string""" | ||
all_queries = [] | ||
|
||
# Need to search for both 'AND' and 'OR' operations | ||
# The score of AND should be higher as it satisfies both OR and AND | ||
for operator in ['AND', 'OR']: | ||
query_string = SimpleQueryString(query=query, fields=cls.search_fields, | ||
default_operator=operator) | ||
all_queries.append(query_string) | ||
|
||
# Run bool query with should, so it returns result where either of the query matches | ||
bool_query = Bool(should=all_queries) | ||
|
||
return bool_query | ||
|
||
def get_queryset(self): | ||
"""Overwrite default queryset to filter certain files to index""" | ||
queryset = super(PageDocument, self).get_queryset() | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -9,11 +9,13 @@ class RTDFacetedSearch(FacetedSearch): | |
# TODO: Remove the overwrite when the elastic/elasticsearch-dsl-py#916 | ||
# See more: https://github.com/elastic/elasticsearch-dsl-py/issues/916 | ||
|
||
def __init__(self, using, index, doc_types, model, **kwargs): | ||
def __init__(self, using, index, doc_types, model, fields=None, **kwargs): | ||
self.using = using | ||
self.index = index | ||
self.doc_types = doc_types | ||
self._model = model | ||
if fields: | ||
self.fields = fields | ||
super(RTDFacetedSearch, self).__init__(**kwargs) | ||
|
||
|
||
|
@@ -25,7 +27,6 @@ class ProjectSearch(RTDFacetedSearch): | |
|
||
|
||
class FileSearch(RTDFacetedSearch): | ||
fields = ['title^10', 'headers^5', 'content'] | ||
facets = { | ||
'project': TermsFacet(field='project'), | ||
'version': TermsFacet(field='version') | ||
|
@@ -34,17 +35,6 @@ class FileSearch(RTDFacetedSearch): | |
def query(self, search, query): | ||
"""Add query part to ``search``""" | ||
if query: | ||
all_queries = [] | ||
|
||
# Need to search for both 'AND' and 'OR' operations | ||
# The score of AND should be higher as it comes first | ||
for operator in ['AND', 'OR']: | ||
query_string = SimpleQueryString(query=query, fields=self.fields, | ||
default_operator=operator) | ||
all_queries.append(query_string) | ||
|
||
# Run bool query with should, so it returns result where either of the query matches | ||
bool_query = Bool(should=all_queries) | ||
search = search.query(bool_query) | ||
search = search.query(query) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like we shouldn't be overwriting the object here. Will we return something invalid if there is no There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have actually a mixed feeling about this. Do you have any idea about how to do this withtout overriding the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I mean mostly just the name of the object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah. |
||
|
||
return search |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,15 @@ | ||
from rest_framework import filters | ||
|
||
from readthedocs.search.utils import get_project_slug_list_or_404 | ||
|
||
|
||
class SearchFilterBackend(filters.BaseFilterBackend): | ||
""" | ||
Filter search result with project | ||
""" | ||
|
||
def filter_queryset(self, request, queryset, view): | ||
project_slug = request.query_params.get('project') | ||
project_slug_list = get_project_slug_list_or_404(project_slug=project_slug, | ||
user=request.user) | ||
return queryset.filter('terms', project=project_slug_list) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Feels like we're conflating querysets and Search queries here again. It feels a bit odd. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah! I understand. Its not queryset, but a Search object but it is similar to queryset. |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,7 @@ | ||
from rest_framework.pagination import PageNumberPagination | ||
|
||
|
||
class SearchPagination(PageNumberPagination): | ||
page_size = 10 | ||
page_size_query_param = 'page_size' | ||
max_page_size = 100 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
from rest_framework import serializers | ||
|
||
|
||
class PageSearchSerializer(serializers.Serializer): | ||
title = serializers.CharField() | ||
headers = serializers.ListField() | ||
content = serializers.CharField() | ||
path = serializers.CharField() |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,7 +22,7 @@ | |
do_not_track, | ||
) | ||
from readthedocs.search import views as search_views | ||
|
||
from readthedocs.search.api import PageSearchAPIView | ||
|
||
v1_api = Api(api_name='v1') | ||
v1_api.register(UserResource()) | ||
|
@@ -67,6 +67,7 @@ | |
url(r'^api/', include(v1_api.urls)), | ||
url(r'^api/v2/', include('readthedocs.restapi.urls')), | ||
url(r'^api-auth/', include('rest_framework.urls', namespace='rest_framework')), | ||
url(r'^api/search/', PageSearchAPIView.as_view()), | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This should likely continue to live in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah! I added it for testing purpose actually! |
||
] | ||
|
||
i18n_urls = [ | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Does
search()
return a queryset? Seems like it would return a search object or similar.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah. it returns a Search object.