Skip to content

Commit f2e355f

Browse files
committed
Re-add Domain search
1 parent 98aae46 commit f2e355f

File tree

8 files changed

+1176
-306
lines changed

8 files changed

+1176
-306
lines changed

readthedocs/search/documents.py

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,8 @@
44
from django.conf import settings
55
from django_elasticsearch_dsl import DocType, Index, fields
66

7-
from readthedocs.projects.models import HTMLFile, Project
8-
7+
from readthedocs.projects.models import Project, HTMLFile
8+
from readthedocs.domaindata.models import DomainData
99

1010
project_conf = settings.ES_INDEXES['project']
1111
project_index = Index(project_conf['name'])
@@ -15,9 +15,52 @@
1515
page_index = Index(page_conf['name'])
1616
page_index.settings(**page_conf['settings'])
1717

18+
domain_conf = settings.ES_INDEXES['domain']
19+
domain_index = Index(domain_conf['name'])
20+
domain_index.settings(**domain_conf['settings'])
21+
1822
log = logging.getLogger(__name__)
1923

2024

25+
@domain_index.doc_type
26+
class DomainDocument(DocType):
27+
project = fields.KeywordField(attr='project.slug')
28+
version = fields.KeywordField(attr='version.slug')
29+
doc_type = fields.KeywordField(attr='doc_type')
30+
anchor = fields.KeywordField(attr='anchor')
31+
32+
class Meta(object):
33+
model = DomainData
34+
fields = ('name', 'display_name', 'doc_name')
35+
ignore_signals = True
36+
37+
@classmethod
38+
def faceted_search(cls, query, user, doc_type=None):
39+
from readthedocs.search.faceted_search import DomainSearch
40+
kwargs = {
41+
'user': user,
42+
'query': query,
43+
}
44+
45+
if doc_type:
46+
kwargs['filters'] = {'doc_type': doc_type}
47+
48+
return DomainSearch(**kwargs)
49+
50+
def get_queryset(self):
51+
"""Overwrite default queryset to filter certain files to index"""
52+
queryset = super().get_queryset()
53+
54+
# Exclude some types to not index
55+
excluded_types = ['std:doc', 'std:label']
56+
57+
# Do not index files that belong to non sphinx project
58+
# Also do not index certain files
59+
for exclude in excluded_types:
60+
queryset = queryset.exclude(type=exclude)
61+
return queryset
62+
63+
2164
@project_index.doc_type
2265
class ProjectDocument(DocType):
2366

readthedocs/search/faceted_search.py

Lines changed: 87 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,44 @@
1+
# -*- coding: utf-8 -*-
12
import logging
23

34
from elasticsearch_dsl import FacetedSearch, TermsFacet
45
from elasticsearch_dsl.query import Bool, SimpleQueryString
56

7+
from readthedocs.search.documents import (
8+
DomainDocument,
9+
PageDocument,
10+
ProjectDocument,
11+
)
12+
from readthedocs.search.signals import (
13+
before_domain_search,
14+
before_file_search,
15+
before_project_search,
16+
)
17+
618
from readthedocs.core.utils.extend import SettingsOverrideObject
7-
from readthedocs.search.documents import PageDocument, ProjectDocument
819

920
log = logging.getLogger(__name__)
1021

1122

23+
ALL_FACETS = ['project', 'version', 'doc_type', 'language', 'index']
24+
25+
1226
class RTDFacetedSearch(FacetedSearch):
1327

1428
def __init__(self, user, **kwargs):
29+
self.user = user
30+
self.filter_by_user = kwargs.pop('filter_by_user', None)
31+
for facet in self.facets:
32+
if facet in kwargs:
33+
kwargs.setdefault('filters', {})[facet] = kwargs.pop(facet)
34+
35+
# Don't pass along unnecessary filters
36+
for f in ALL_FACETS:
37+
if f in kwargs:
38+
del kwargs[f]
39+
super(RTDFacetedSearch, self).__init__(**kwargs)
40+
41+
def search(self):
1542
"""
1643
Pass in a user in order to filter search results by privacy.
1744
@@ -20,48 +47,36 @@ def __init__(self, user, **kwargs):
2047
The `self.user` attribute isn't currently used on the .org,
2148
but is used on the .com
2249
"""
23-
self.user = user
24-
self.filter_by_user = kwargs.pop('filter_by_user', None)
25-
super().__init__(**kwargs)
50+
s = super().search()
51+
s = s.source(exclude=['content', 'headers'])
52+
resp = self.signal.send(sender=self, user=self.user, search=s)
53+
if resp:
54+
# Signal return a search object
55+
try:
56+
s = resp[0][1]
57+
except AttributeError:
58+
log.exception(
59+
'Failed to return a search object from search signals'
60+
)
61+
# Return 25 results
62+
return s[:25]
2663

2764
def query(self, search, query):
2865
"""
2966
Add query part to ``search`` when needed.
3067
31-
Also does HTML encoding of results to avoid XSS issues.
68+
Also:
69+
70+
* Adds SimpleQueryString instead of default query.
71+
* Adds HTML encoding of results to avoid XSS issues.
3272
"""
33-
search = super().query(search, query)
3473
search = search.highlight_options(encoder='html', number_of_fragments=3)
35-
search = search.source(exclude=['content', 'headers'])
36-
return search
37-
38-
39-
class ProjectSearchBase(RTDFacetedSearch):
40-
facets = {'language': TermsFacet(field='language')}
41-
doc_types = [ProjectDocument]
42-
index = ProjectDocument._doc_type.index
43-
fields = ('name^10', 'slug^5', 'description')
44-
45-
46-
class PageSearchBase(RTDFacetedSearch):
47-
facets = {
48-
'project': TermsFacet(field='project'),
49-
'version': TermsFacet(field='version')
50-
}
51-
doc_types = [PageDocument]
52-
index = PageDocument._doc_type.index
53-
fields = ['title^10', 'headers^5', 'content']
54-
55-
def query(self, search, query):
56-
"""Use a custom SimpleQueryString instead of default query."""
57-
58-
search = super().query(search, query)
5974

6075
all_queries = []
6176

6277
# need to search for both 'and' and 'or' operations
6378
# the score of and should be higher as it satisfies both or and and
64-
for operator in ['AND', 'OR']:
79+
for operator in ['and', 'or']:
6580
query_string = SimpleQueryString(
6681
query=query, fields=self.fields, default_operator=operator
6782
)
@@ -74,23 +89,50 @@ def query(self, search, query):
7489
return search
7590

7691

77-
class PageSearch(SettingsOverrideObject):
78-
79-
"""
80-
Allow this class to be overridden based on CLASS_OVERRIDES setting.
81-
82-
This is primary used on the .com to adjust how we filter our search queries
83-
"""
92+
class DomainSearch(RTDFacetedSearch):
93+
facets = {
94+
'project': TermsFacet(field='project'),
95+
'version': TermsFacet(field='version'),
96+
'doc_type': TermsFacet(field='doc_type'),
97+
}
98+
signal = before_domain_search
99+
doc_types = [DomainDocument]
100+
index = DomainDocument._doc_type.index
101+
fields = ('display_name^5', 'name')
84102

85-
_default_class = PageSearchBase
86103

104+
class ProjectSearch(RTDFacetedSearch):
105+
facets = {
106+
'language': TermsFacet(field='language')
107+
}
108+
signal = before_project_search
109+
doc_types = [ProjectDocument]
110+
index = ProjectDocument._doc_type.index
111+
fields = ('name^10', 'slug^5', 'description')
87112

88-
class ProjectSearch(SettingsOverrideObject):
89113

90-
"""
91-
Allow this class to be overridden based on CLASS_OVERRIDES setting.
114+
class PageSearchBase(RTDFacetedSearch):
115+
facets = {
116+
'project': TermsFacet(field='project'),
117+
'version': TermsFacet(field='version')
118+
}
119+
doc_types = [PageDocument]
120+
index = PageDocument._doc_type.index
121+
fields = ['title^10', 'headers^5', 'content']
92122

93-
This is primary used on the .com to adjust how we filter our search queries
94-
"""
95123

96-
_default_class = ProjectSearchBase
124+
class AllSearch(RTDFacetedSearch):
125+
facets = {
126+
'project': TermsFacet(field='project'),
127+
'version': TermsFacet(field='version'),
128+
'language': TermsFacet(field='language'),
129+
'doc_type': TermsFacet(field='doc_type'),
130+
'index': TermsFacet(field='_index'),
131+
}
132+
signal = before_file_search
133+
doc_types = [DomainDocument, PageDocument, ProjectDocument]
134+
index = [DomainDocument._doc_type.index,
135+
PageDocument._doc_type.index,
136+
ProjectDocument._doc_type.index]
137+
fields = ('title^10', 'headers^5', 'content', 'name^20',
138+
'slug^5', 'description', 'display_name^5')

0 commit comments

Comments
 (0)