Skip to content

Commit 05b7c3f

Browse files
authored
Merge pull request #5197 from rtfd/readd-search-signals
Refactor search code
2 parents 5e2c781 + 0a06726 commit 05b7c3f

28 files changed

+498
-769
lines changed

docs/development/search.rst

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,13 +37,14 @@ By default, Auto Indexing is turned off in development mode. To turn it on, chan
3737
After that, whenever a documentation successfully builds, or project gets added,
3838
the search index will update automatically.
3939

40-
4140
Architecture
4241
------------
4342
The search architecture is devided into 2 parts.
44-
One part is responsible for **indexing** the documents and projects and
45-
the other part is responsible for querying the Index to show the proper results to users.
46-
We use the `django-elasticsearch-dsl`_ package mostly to the keep the search working.
43+
44+
* One part is responsible for **indexing** the documents and projects (``documents.py``)
45+
* The other part is responsible for **querying** the Index to show the proper results to users (``faceted_search.py``)
46+
47+
We use the `django-elasticsearch-dsl`_ package for our Document abstraction.
4748
`django-elasticsearch-dsl`_ is a wrapper around `elasticsearch-dsl`_ for easy configuration
4849
with Django.
4950

@@ -72,11 +73,11 @@ and index/delete the documentation content from the `HTMLFile` instances.
7273

7374
How we index projects
7475
~~~~~~~~~~~~~~~~~~~~~
76+
7577
We also index project information in our search index so that the user can search for projects
76-
from the main site. `django-elasticsearch-dsl`_ listen `post_create` and `post_delete` signals of
78+
from the main site. We listen to the `post_create` and `post_delete` signals of
7779
`Project` model and index/delete into Elasticsearch accordingly.
7880

79-
8081
Elasticsearch Document
8182
~~~~~~~~~~~~~~~~~~~~~~
8283

@@ -88,9 +89,7 @@ As per requirements of `django-elasticsearch-dsl`_, it is stored in the
8889
`django-elasticsearch-dsl`_ listens to the `post_save` signal of `Project` model and
8990
then index/delete into Elasticsearch.
9091

91-
**PageDocument**: It is used for indexing documentation of projects. By default, the auto
92-
indexing is turned off by `ignore_signals = settings.ES_PAGE_IGNORE_SIGNALS`.
93-
`settings.ES_PAGE_IGNORE_SIGNALS` is `False` both in development and production.
92+
**PageDocument**: It is used for indexing documentation of projects.
9493
As mentioned above, our `Search` app listens to the `bulk_post_create` and `bulk_post_delete`
9594
signals and indexes/deleted documentation into Elasticsearch. The signal listeners are in
9695
the `readthedocs/search/signals.py` file. Both of the signals are dispatched

readthedocs/core/static-src/core/js/doc-embed/search.js

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,18 @@ function attach_elastic_search_query(data) {
5555

5656
// Show highlighted texts
5757
if (highlight.content) {
58-
var content_text = xss(highlight.content[0]);
59-
var contents = $('<div class="context">');
60-
61-
contents.html(content_text);
62-
contents.find('em').addClass('highlighted');
63-
list_item.append(contents);
58+
for (var index = 0; index < highlight.content.length; index += 1) {
59+
if (index < 3) {
60+
// Show up to 3 results for search
61+
var content = highlight.content[index];
62+
var content_text = xss(content);
63+
var contents = $('<div class="context">');
64+
65+
contents.html("..." + content_text + "...");
66+
contents.find('em').addClass('highlighted');
67+
list_item.append(contents);
68+
}
69+
}
6470
}
6571

6672
Search.output.append(list_item);
@@ -71,10 +77,11 @@ function attach_elastic_search_query(data) {
7177
if (!hit_list.length) {
7278
// Fallback to Sphinx's indexes
7379
Search.query_fallback(query);
80+
console.log('Read the Docs search failed. Falling back to Sphinx search.');
7481
}
7582
else {
7683
Search.status.text(
77-
_('Search finished, found %s page(s) matching the search query.').replace('%s', total_count)
84+
_('Search finished, found %s page(s) matching the search query.').replace('%s', hit_list.length)
7885
);
7986
}
8087
})

readthedocs/core/static/core/js/readthedocs-doc-embed.js

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

readthedocs/projects/models.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,11 +1145,15 @@ def get_processed_json(self):
11451145
return process_file(file_path)
11461146
except Exception:
11471147
log.warning(
1148-
'Unhandled exception during search processing file: %s' % file_path
1148+
'Unhandled exception during search processing file: %s',
1149+
file_path,
11491150
)
11501151
return {
1151-
'headers': [], 'content': '', 'path': file_path, 'title': '',
1152-
'sections': []
1152+
'headers': [],
1153+
'content': '',
1154+
'path': file_path,
1155+
'title': '',
1156+
'sections': [],
11531157
}
11541158

11551159
@cached_property

readthedocs/projects/urls/public.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from readthedocs.constants import pattern_opts
99
from readthedocs.projects.views import public
1010
from readthedocs.projects.views.public import ProjectDetailView, ProjectIndex
11+
from readthedocs.search import views as search_views
1112

1213

1314
urlpatterns = [
@@ -50,7 +51,7 @@
5051
),
5152
url(
5253
r'^(?P<project_slug>{project_slug})/search/$'.format(**pattern_opts),
53-
public.elastic_project_search,
54+
search_views.elastic_project_search,
5455
name='elastic_project_search',
5556
),
5657
url(

readthedocs/projects/views/public.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,7 @@
2525
from readthedocs.builds.models import Version
2626
from readthedocs.builds.views import BuildTriggerMixin
2727
from readthedocs.projects.models import Project
28-
from readthedocs.search.documents import PageDocument
2928
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
30-
from readthedocs.search.views import LOG_TEMPLATE
3129

3230
from .base import ProjectOnboardMixin
3331

@@ -240,48 +238,6 @@ def project_download_media(request, project_slug, type_, version_slug):
240238
return response
241239

242240

243-
def elastic_project_search(request, project_slug):
244-
"""Use elastic search to search in a project."""
245-
queryset = Project.objects.protected(request.user)
246-
project = get_object_or_404(queryset, slug=project_slug)
247-
version_slug = request.GET.get('version', LATEST)
248-
query = request.GET.get('q', None)
249-
results = None
250-
if query:
251-
user = ''
252-
if request.user.is_authenticated:
253-
user = request.user
254-
log.info(
255-
LOG_TEMPLATE.format(
256-
user=user,
257-
project=project or '',
258-
type='inproject',
259-
version=version_slug or '',
260-
language='',
261-
msg=query or '',
262-
),
263-
)
264-
265-
if query:
266-
req = PageDocument.simple_search(query=query)
267-
filtered_query = (
268-
req.filter('term', project=project.slug)
269-
.filter('term', version=version_slug)
270-
)
271-
paginated_query = filtered_query[:50]
272-
results = paginated_query.execute()
273-
274-
return render(
275-
request,
276-
'search/elastic_project_search.html',
277-
{
278-
'project': project,
279-
'query': query,
280-
'results': results,
281-
},
282-
)
283-
284-
285241
def project_versions(request, project_slug):
286242
"""
287243
Project version list view.

readthedocs/search/api.py

Lines changed: 93 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,54 @@
1+
import logging
2+
from pprint import pformat
3+
14
from rest_framework import generics
5+
from rest_framework import serializers
26
from rest_framework.exceptions import ValidationError
7+
from rest_framework.pagination import PageNumberPagination
38

49
from readthedocs.search.documents import PageDocument
5-
from readthedocs.search.filters import SearchFilterBackend
6-
from readthedocs.search.pagination import SearchPagination
7-
from readthedocs.search.serializers import PageSearchSerializer
810
from readthedocs.search.utils import get_project_list_or_404
911

12+
log = logging.getLogger(__name__)
13+
14+
15+
class SearchPagination(PageNumberPagination):
16+
page_size = 25
17+
page_size_query_param = 'page_size'
18+
max_page_size = 100
19+
20+
21+
class PageSearchSerializer(serializers.Serializer):
22+
project = serializers.CharField()
23+
version = serializers.CharField()
24+
title = serializers.CharField()
25+
path = serializers.CharField()
26+
link = serializers.SerializerMethodField()
27+
highlight = serializers.SerializerMethodField()
28+
29+
def get_link(self, obj):
30+
projects_url = self.context.get('projects_url')
31+
if projects_url:
32+
docs_url = projects_url[obj.project]
33+
return docs_url + obj.path
34+
35+
def get_highlight(self, obj):
36+
highlight = getattr(obj.meta, 'highlight', None)
37+
if highlight:
38+
if hasattr(highlight, 'content'):
39+
# Change results to turn newlines in highlight into periods
40+
# https://github.com/rtfd/readthedocs.org/issues/5168
41+
highlight.content = [result.replace('\n', '. ') for result in highlight.content]
42+
ret = highlight.to_dict()
43+
log.debug('API Search highlight: %s', pformat(ret))
44+
return ret
45+
1046

1147
class PageSearchAPIView(generics.ListAPIView):
48+
49+
"""Main entry point to perform a search using Elasticsearch."""
50+
1251
pagination_class = SearchPagination
13-
filter_backends = [SearchFilterBackend]
1452
serializer_class = PageSearchSerializer
1553

1654
def get_queryset(self):
@@ -24,10 +62,25 @@ def get_queryset(self):
2462
# Validate all the required params are there
2563
self.validate_query_params()
2664
query = self.request.query_params.get('q', '')
27-
queryset = PageDocument.simple_search(query=query)
65+
kwargs = {'filter_by_user': False}
66+
kwargs['projects_list'] = [p.slug for p in self.get_all_projects()]
67+
kwargs['versions_list'] = self.request.query_params.get('version')
68+
user = self.request.user
69+
queryset = PageDocument.faceted_search(
70+
query=query, user=user, **kwargs
71+
)
2872
return queryset
2973

3074
def validate_query_params(self):
75+
"""
76+
Validate all required query params are passed on the request.
77+
78+
Query params required are: ``q``, ``project`` and ``version``.
79+
80+
:rtype: None
81+
82+
:raises: ValidationError if one of them is missing.
83+
"""
3184
required_query_params = {'q', 'project', 'version'} # python `set` literal is `{}`
3285
request_params = set(self.request.query_params.keys())
3386
missing_params = required_query_params - request_params
@@ -39,17 +92,47 @@ def validate_query_params(self):
3992
raise ValidationError(errors)
4093

4194
def get_serializer_context(self):
42-
context = super(PageSearchAPIView, self).get_serializer_context()
95+
context = super().get_serializer_context()
4396
context['projects_url'] = self.get_all_projects_url()
4497
return context
4598

99+
def get_all_projects(self):
100+
"""
101+
Return a list containing the project itself and all its subprojects.
102+
103+
The project slug is retrieved from ``project`` query param.
104+
105+
:rtype: list
106+
107+
:raises: Http404 if project is not found
108+
"""
109+
project_slug = self.request.query_params.get('project')
110+
version_slug = self.request.query_params.get('version')
111+
all_projects = get_project_list_or_404(
112+
project_slug=project_slug, user=self.request.user, version_slug=version_slug,
113+
)
114+
return all_projects
115+
46116
def get_all_projects_url(self):
117+
"""
118+
Return a dict containing the project slug and its version URL.
119+
120+
The dictionary contains the project and its subprojects . Each project's
121+
slug is used as a key and the documentation URL for that project and
122+
version as the value.
123+
124+
Example:
125+
126+
{
127+
"requests": "https://requests.readthedocs.io/en/latest/",
128+
"requests-oauth": "https://requests-oauth.readthedocs.io/en/latest/",
129+
}
130+
131+
:rtype: dict
132+
"""
133+
all_projects = self.get_all_projects()
47134
version_slug = self.request.query_params.get('version')
48-
project_slug = self.request.query_params.get('project')
49-
all_projects = get_project_list_or_404(project_slug=project_slug, user=self.request.user)
50135
projects_url = {}
51-
52136
for project in all_projects:
53137
projects_url[project.slug] = project.get_docs_url(version_slug=version_slug)
54-
55138
return projects_url

readthedocs/search/apps.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,8 @@
1-
"""Project app config"""
2-
31
from django.apps import AppConfig
42

53

64
class SearchConfig(AppConfig):
75
name = 'readthedocs.search'
86

97
def ready(self):
10-
from .signals import index_html_file, remove_html_file
8+
import readthedocs.search.signals # noqa

0 commit comments

Comments
 (0)