-
-
Notifications
You must be signed in to change notification settings - Fork 3.6k
Refactor search code #5197
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactor search code #5197
Changes from 10 commits
47e3c1a
a84bb15
6f36acd
0bfa998
936465d
0496cf6
ab28491
617f47f
d0f6082
af8a28f
5f197ec
465628e
e3e245c
e909759
49098d7
07dc26b
e9f3f3f
d6d02da
336e674
eb65817
144ed0e
d314f58
03ad482
5dfc17a
fcc9620
701ecdb
f636737
7583f9a
747da90
6c317f1
e2655de
f457ff7
7b2013c
0741a25
85956e0
6ac2f35
e062191
8e4cc2b
8c7bda4
5b9f460
e2e271b
a993f08
d52b968
fc277fa
417ea45
80c58c7
72d867f
5f118ff
e263e69
1a3e146
fab9f42
0a06726
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,15 +2,13 @@ | |
from rest_framework.exceptions import ValidationError | ||
|
||
from readthedocs.search.documents import PageDocument | ||
from readthedocs.search.filters import SearchFilterBackend | ||
from readthedocs.search.pagination import SearchPagination | ||
from readthedocs.search.serializers import PageSearchSerializer | ||
from readthedocs.search.utils import get_project_list_or_404 | ||
|
||
|
||
class PageSearchAPIView(generics.ListAPIView): | ||
pagination_class = SearchPagination | ||
filter_backends = [SearchFilterBackend] | ||
serializer_class = PageSearchSerializer | ||
|
||
def get_queryset(self): | ||
|
@@ -24,7 +22,15 @@ def get_queryset(self): | |
# Validate all the required params are there | ||
self.validate_query_params() | ||
query = self.request.query_params.get('q', '') | ||
queryset = PageDocument.simple_search(query=query) | ||
kwargs = {} | ||
kwargs['projects_list'] = [p.slug for p in self.get_all_projects()] | ||
kwargs['versions_list'] = self.request.query_params.get('version') | ||
user = '' | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think it's a better pattern to default to the I think you can pass There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I agree here, unless there is some significance to ES handling the user as an empty string. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nope, was just the old way we were doing it. Fixed it now. |
||
if self.request.user.is_authenticated: | ||
user = self.request.user | ||
queryset = PageDocument.faceted_search( | ||
query=query, user=user, **kwargs | ||
) | ||
return queryset | ||
|
||
def validate_query_params(self): | ||
|
@@ -43,13 +49,15 @@ def get_serializer_context(self): | |
context['projects_url'] = self.get_all_projects_url() | ||
return context | ||
|
||
def get_all_projects_url(self): | ||
version_slug = self.request.query_params.get('version') | ||
def get_all_projects(self): | ||
project_slug = self.request.query_params.get('project') | ||
all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user) | ||
projects_url = {} | ||
return all_projects | ||
|
||
def get_all_projects_url(self): | ||
all_projects = self.get_all_projects() | ||
version_slug = self.request.query_params.get('version') | ||
projects_url = {} | ||
for project in all_projects: | ||
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug) | ||
|
||
return projects_url |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,14 +30,19 @@ class Meta(object): | |
}) | ||
language = fields.KeywordField() | ||
|
||
# Fields to perform search with weight | ||
search_fields = ['name^5', 'description'] | ||
|
||
@classmethod | ||
def faceted_search(cls, query, language=None, using=None, index=None): | ||
def faceted_search(cls, query, user, language=None, using=None, index=None): | ||
kwargs = { | ||
'user': user, | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ericholscher I think we can pass the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Will take a peek at that in a refactor, I think it's OK for now. |
||
'model': cls._doc_type.model, | ||
'query': query | ||
'query': query, | ||
'fields': cls.search_fields | ||
} | ||
|
||
if language: | ||
|
@@ -69,9 +74,12 @@ class Meta(object): | |
'search/index.html', 'genindex/index.html', 'py-modindex/index.html'] | ||
|
||
@classmethod | ||
def faceted_search(cls, query, projects_list=None, versions_list=None, using=None, index=None): | ||
def faceted_search( | ||
cls, query, user, projects_list=None, versions_list=None, using=None, index=None | ||
): | ||
es_query = cls.get_es_query(query=query) | ||
kwargs = { | ||
'user': user, | ||
'using': using or cls._doc_type.using, | ||
'index': index or cls._doc_type.index, | ||
'doc_types': [cls], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same here, pass the |
||
|
@@ -90,26 +98,6 @@ def faceted_search(cls, query, projects_list=None, versions_list=None, using=Non | |
|
||
return FileSearch(**kwargs) | ||
|
||
@classmethod | ||
def simple_search(cls, query, using=None, index=None): | ||
""" | ||
Do a search without facets. | ||
|
||
This is used in: | ||
|
||
* The Docsearch API | ||
* The Project Admin Search page | ||
""" | ||
|
||
es_search = cls.search(using=using, index=index) | ||
es_search = es_search.highlight_options(encoder='html') | ||
|
||
es_query = cls.get_es_query(query=query) | ||
highlighted_fields = [f.split('^', 1)[0] for f in cls.search_fields] | ||
es_search = es_search.query(es_query).highlight(*highlighted_fields) | ||
|
||
return es_search | ||
|
||
@classmethod | ||
def get_es_query(cls, query): | ||
"""Return the Elasticsearch query generated from the query string""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,9 @@ | ||
import logging | ||
|
||
from elasticsearch_dsl import FacetedSearch, TermsFacet | ||
from readthedocs.search.signals import before_file_search, before_project_search | ||
|
||
log = logging.getLogger(__name__) | ||
|
||
|
||
class RTDFacetedSearch(FacetedSearch): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This class if it's not used here, should probably lives on corporate repo. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Eh? It's definitely used on the .org. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed the comment. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevermind. Got confused by the comment. |
||
|
@@ -8,7 +13,8 @@ class RTDFacetedSearch(FacetedSearch): | |
# TODO: Remove the overwrite when the elastic/elasticsearch-dsl-py#916 | ||
# See more: https://github.com/elastic/elasticsearch-dsl-py/issues/916 | ||
|
||
def __init__(self, using, index, doc_types, model, fields=None, **kwargs): | ||
def __init__(self, user, using, index, doc_types, model, fields=None, **kwargs): | ||
self.user = user | ||
self.using = using | ||
self.index = index | ||
self.doc_types = doc_types | ||
|
@@ -17,28 +23,47 @@ def __init__(self, using, index, doc_types, model, fields=None, **kwargs): | |
self.fields = fields | ||
super(RTDFacetedSearch, self).__init__(**kwargs) | ||
|
||
def search(self): | ||
""" | ||
Filter out full content on search results | ||
|
||
This was causing all of the indexed content to be returned, | ||
which was never used on the client side. | ||
""" | ||
s = super().search() | ||
s = s.source(exclude=['content', 'headers']) | ||
resp = self.signal.send(sender=self, user=self.user, search=s) | ||
if resp: | ||
# Signal return a search object | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think its not appropiate approach to fire a signal and use the returned object. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are some docs about this: https://docs.djangoproject.com/en/1.11/topics/signals/#django.dispatch.Signal.send We should be careful about how we manipulate the result considering that it's a list. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, I tried to manipulate the object that was passed, but it didn't work in my testing. I'm not sure another good way to implement this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think the line Why are we using a signal for this? I suppose that it's because we need to override this on .com. In that case, isn't it better to make it a method and override the class using our SettingsOverride system?
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yea, having a class override on the .com is probably a cleaner solution here. I will work on this. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Pushed up a new version with this approach. |
||
try: | ||
s = resp[0][1] | ||
except AttributeError: | ||
log.exception('Failed to return a search object from search signals') | ||
return s | ||
|
||
def query(self, search, query): | ||
""" | ||
Add query part to ``search`` when needed | ||
|
||
Also does HTML encoding of results to avoid XSS issues. | ||
|
||
""" | ||
search = search.highlight_options(encoder='html', number_of_fragments=3) | ||
if not isinstance(query, str): | ||
search = search.query(query) | ||
return search | ||
|
||
|
||
class ProjectSearch(RTDFacetedSearch): | ||
fields = ['name^5', 'description'] | ||
facets = { | ||
'language': TermsFacet(field='language') | ||
} | ||
signal = before_project_search | ||
|
||
|
||
class FileSearch(RTDFacetedSearch): | ||
facets = { | ||
'project': TermsFacet(field='project'), | ||
'version': TermsFacet(field='version') | ||
} | ||
|
||
def query(self, search, query): | ||
""" | ||
Add query part to ``search`` | ||
|
||
Overriding because we pass ES Query object instead of string | ||
""" | ||
search = search.highlight_options(encoder='html') | ||
if query: | ||
search = search.query(query) | ||
|
||
return search | ||
signal = before_file_search |
This file was deleted.
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,8 +12,8 @@ | |
@pytest.mark.search | ||
class TestDocumentSearch(object): | ||
|
||
def __init__(self): | ||
# This reverse needs to be inside the ``__init__`` method because from | ||
def setUp(self): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# This reverse needs to be inside the ``setUp`` method because from | ||
# the Corporate site we don't define this URL if ``-ext`` module is not | ||
# installed | ||
self.url = reverse('doc_search') | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
So, JS quirk here.
for ... in
isn't actually for iterables, it's for properties on an object. It's better to use the oldfor (var i; ...)
approach for arrays:https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...in
There is
for ... of
, which loops over iterables, but browser support is still new:https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Statements/for...of#Browser_compatibility