Skip to content

Commit 5234f41

Browse files
authored
Merge pull request #7634 from readthedocs/search-subprojects
Search: allow to search on different versions of subprojects
2 parents aa92a07 + 2f4af17 commit 5234f41

File tree

4 files changed

+152
-29
lines changed

4 files changed

+152
-29
lines changed

readthedocs/projects/models.py

+24-10
Original file line numberDiff line numberDiff line change
@@ -1568,8 +1568,14 @@ def add_features(sender, **kwargs):
15681568
SKIP_SYNC_VERSIONS = 'skip_sync_versions'
15691569
CACHED_ENVIRONMENT = 'cached_environment'
15701570
LIMIT_CONCURRENT_BUILDS = 'limit_concurrent_builds'
1571+
1572+
# Search related features
15711573
DISABLE_SERVER_SIDE_SEARCH = 'disable_server_side_search'
15721574
ENABLE_MKDOCS_SERVER_SIDE_SEARCH = 'enable_mkdocs_server_side_search'
1575+
DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search'
1576+
INDEX_FROM_HTML_FILES = 'index_from_html_files'
1577+
SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION = 'search_subprojects_on_default_version'
1578+
15731579
FORCE_SPHINX_FROM_VENV = 'force_sphinx_from_venv'
15741580
LIST_PACKAGES_INSTALLED_ENV = 'list_packages_installed_env'
15751581
VCS_REMOTE_LISTING = 'vcs_remote_listing'
@@ -1578,8 +1584,6 @@ def add_features(sender, **kwargs):
15781584
USE_SPHINX_BUILDERS = 'use_sphinx_builders'
15791585
DEDUPLICATE_BUILDS = 'deduplicate_builds'
15801586
USE_SPHINX_RTD_EXT_LATEST = 'rtd_sphinx_ext_latest'
1581-
DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search'
1582-
INDEX_FROM_HTML_FILES = 'index_from_html_files'
15831587
DONT_CREATE_INDEX = 'dont_create_index'
15841588
USE_NEW_PIP_RESOLVER = 'use_new_pip_resolver'
15851589
DONT_INSTALL_LATEST_PIP = 'dont_install_latest_pip'
@@ -1667,6 +1671,8 @@ def add_features(sender, **kwargs):
16671671
LIMIT_CONCURRENT_BUILDS,
16681672
_('Limit the amount of concurrent builds'),
16691673
),
1674+
1675+
# Search related features.
16701676
(
16711677
DISABLE_SERVER_SIDE_SEARCH,
16721678
_('Disable server side search'),
@@ -1675,6 +1681,22 @@ def add_features(sender, **kwargs):
16751681
ENABLE_MKDOCS_SERVER_SIDE_SEARCH,
16761682
_('Enable server side search for MkDocs projects'),
16771683
),
1684+
(
1685+
DEFAULT_TO_FUZZY_SEARCH,
1686+
_('Default to fuzzy search for simple search queries'),
1687+
),
1688+
(
1689+
INDEX_FROM_HTML_FILES,
1690+
_('Index content directly from html files instead or relying in other sources'),
1691+
),
1692+
(
1693+
SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION,
1694+
_(
1695+
'When searching subprojects default to its default version if it doesn\'t '
1696+
'have the same version as the main project'
1697+
),
1698+
),
1699+
16781700
(
16791701
FORCE_SPHINX_FROM_VENV,
16801702
_('Force to use Sphinx from the current virtual environment'),
@@ -1710,14 +1732,6 @@ def add_features(sender, **kwargs):
17101732
USE_SPHINX_RTD_EXT_LATEST,
17111733
_('Use latest version of the Read the Docs Sphinx extension'),
17121734
),
1713-
(
1714-
DEFAULT_TO_FUZZY_SEARCH,
1715-
_('Default to fuzzy search for simple search queries'),
1716-
),
1717-
(
1718-
INDEX_FROM_HTML_FILES,
1719-
_('Index content directly from html files instead or relying in other sources'),
1720-
),
17211735
(
17221736
DONT_CREATE_INDEX,
17231737
_('Do not create index.md or README.rst if the project does not have one.'),

readthedocs/search/api.py

+40-14
Original file line numberDiff line numberDiff line change
@@ -227,14 +227,26 @@ def _get_all_projects_data(self):
227227
subprojects = Project.objects.filter(
228228
superprojects__parent_id=main_project.id,
229229
)
230-
for project in subprojects:
230+
for subproject in subprojects:
231231
version = self._get_subproject_version(
232232
version_slug=main_version.slug,
233-
subproject=project,
233+
subproject=subproject,
234234
)
235+
236+
# Fallback to the default version of the subproject.
237+
if (
238+
not version
239+
and main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION)
240+
and subproject.default_version
241+
):
242+
version = self._get_subproject_version(
243+
version_slug=subproject.default_version,
244+
subproject=subproject,
245+
)
246+
235247
if version and self._has_permission(self.request.user, version):
236-
url = project.get_docs_url(version_slug=version.slug)
237-
projects_data[project.slug] = VersionData(
248+
url = subproject.get_docs_url(version_slug=version.slug)
249+
projects_data[subproject.slug] = VersionData(
238250
slug=version.slug,
239251
doctype=version.documentation_type,
240252
docs_url=url,
@@ -290,26 +302,40 @@ def get_queryset(self):
290302
calling ``search.execute().hits``. This is why an DSL search object
291303
is compatible with DRF's paginator.
292304
"""
305+
main_project = self._get_project()
306+
main_version = self._get_version()
307+
projects = {}
293308
filters = {}
294-
filters['project'] = list(self._get_all_projects_data().keys())
295-
filters['version'] = self._get_version().slug
296309

297-
# Check to avoid searching all projects in case these filters are empty.
298-
if not filters['project']:
299-
log.info('Unable to find a project to search')
300-
return []
301-
if not filters['version']:
302-
log.info('Unable to find a version to search')
303-
return []
310+
if main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION):
311+
projects = {
312+
project: version.slug
313+
for project, version in self._get_all_projects_data().items()
314+
}
315+
# Check to avoid searching all projects in case it's empty.
316+
if not projects:
317+
log.info('Unable to find a version to search')
318+
return []
319+
else:
320+
filters['project'] = list(self._get_all_projects_data().keys())
321+
filters['version'] = main_version.slug
322+
# Check to avoid searching all projects in case these filters are empty.
323+
if not filters['project']:
324+
log.info('Unable to find a project to search')
325+
return []
326+
if not filters['version']:
327+
log.info('Unable to find a version to search')
328+
return []
304329

305330
query = self.request.query_params['q']
306331
queryset = PageSearch(
307332
query=query,
333+
projects=projects,
308334
filters=filters,
309335
user=self.request.user,
310336
# We use a permission class to control authorization
311337
filter_by_user=False,
312-
use_advanced_query=not self._get_project().has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH),
338+
use_advanced_query=not main_project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH),
313339
)
314340
return queryset
315341

readthedocs/search/faceted_search.py

+37-5
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
MultiMatch,
1212
Nested,
1313
SimpleQueryString,
14+
Term,
1415
Wildcard,
1516
)
1617

@@ -38,12 +39,23 @@ class RTDFacetedSearch(FacetedSearch):
3839
'post_tags': ['</span>'],
3940
}
4041

41-
def __init__(self, query=None, filters=None, user=None, use_advanced_query=True, **kwargs):
42+
def __init__(
43+
self,
44+
query=None,
45+
filters=None,
46+
projects=None,
47+
user=None,
48+
use_advanced_query=True,
49+
**kwargs,
50+
):
4251
"""
4352
Pass in a user in order to filter search results by privacy.
4453
45-
If `use_advanced_query` is `True`,
46-
force to always use `SimpleQueryString` for the text query object.
54+
:param projects: A dictionary of project slugs mapped to a `VersionData` object.
55+
Results are filter with these values.
56+
57+
:param use_advanced_query: If `True` forces to always use
58+
`SimpleQueryString` for the text query object.
4759
4860
.. warning::
4961
@@ -53,6 +65,7 @@ def __init__(self, query=None, filters=None, user=None, use_advanced_query=True,
5365
self.user = user
5466
self.filter_by_user = kwargs.pop('filter_by_user', True)
5567
self.use_advanced_query = use_advanced_query
68+
self.projects = projects or {}
5669

5770
# Hack a fix to our broken connection pooling
5871
# This creates a new connection on every request,
@@ -265,7 +278,12 @@ def total_count(self):
265278
return s.hits.total
266279

267280
def query(self, search, query):
268-
"""Manipulates the query to support nested queries and a custom rank for pages."""
281+
"""
282+
Manipulates the query to support nested queries and a custom rank for pages.
283+
284+
If `self.projects` was given, we use it to filter the documents that
285+
match the same project and version.
286+
"""
269287
search = search.highlight_options(**self._highlight_options)
270288
search = search.source(excludes=self.excludes)
271289

@@ -287,8 +305,22 @@ def query(self, search, query):
287305
)
288306

289307
queries.extend([sections_nested_query, domains_nested_query])
308+
bool_query = Bool(should=queries)
309+
310+
if self.projects:
311+
versions_query = [
312+
Bool(
313+
must=[
314+
Term(project={'value': project}),
315+
Term(version={'value': version}),
316+
]
317+
)
318+
for project, version in self.projects.items()
319+
]
320+
bool_query = Bool(must=[bool_query, Bool(should=versions_query)])
321+
290322
final_query = FunctionScore(
291-
query=Bool(should=queries),
323+
query=bool_query,
292324
script_score=self._get_script_score(),
293325
)
294326
search = search.query(final_query)

readthedocs/search/tests/test_api.py

+51
Original file line numberDiff line numberDiff line change
@@ -264,11 +264,62 @@ def test_doc_search_subprojects(self, api_client, all_projects):
264264
# First result should be the subproject
265265
first_result = data[0]
266266
assert first_result['project'] == subproject.slug
267+
# The result is from the same version as the main project.
268+
assert first_result['version'] == version.slug
267269
# Check the link is the subproject document link
268270
document_link = subproject.get_docs_url(version_slug=version.slug)
269271
link = first_result['domain'] + first_result['path']
270272
assert document_link in link
271273

274+
def test_doc_search_subprojects_default_version(self, api_client, all_projects):
275+
"""Return results from subprojects that match the version from the main project or fallback to its default version."""
276+
project = all_projects[0]
277+
version = project.versions.all()[0]
278+
feature, _ = Feature.objects.get_or_create(
279+
feature_id=Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION,
280+
)
281+
project.feature_set.add(feature)
282+
283+
subproject = all_projects[1]
284+
subproject_version = subproject.versions.all()[0]
285+
286+
# Change the name of the version, and make it default.
287+
subproject_version.slug = 'different'
288+
subproject_version.save()
289+
subproject.default_version = subproject_version.slug
290+
subproject.save()
291+
subproject.versions.filter(slug=version.slug).delete()
292+
293+
# Refresh index
294+
version_files = HTMLFile.objects.all().filter(version=subproject_version)
295+
for f in version_files:
296+
PageDocument().update(f)
297+
298+
# Add another project as subproject of the project
299+
project.add_subproject(subproject)
300+
301+
# Now search with subproject content but explicitly filter by the parent project
302+
query = get_search_query_from_project_file(project_slug=subproject.slug)
303+
search_params = {
304+
'q': query,
305+
'project': project.slug,
306+
'version': version.slug
307+
}
308+
resp = self.get_search(api_client, search_params)
309+
assert resp.status_code == 200
310+
311+
data = resp.data['results']
312+
assert len(data) >= 1 # there may be results from another projects
313+
314+
# First result should be the subproject
315+
first_result = data[0]
316+
assert first_result['project'] == subproject.slug
317+
assert first_result['version'] == 'different'
318+
# Check the link is the subproject document link
319+
document_link = subproject.get_docs_url(version_slug=subproject_version.slug)
320+
link = first_result['domain'] + first_result['path']
321+
assert document_link in link
322+
272323
def test_doc_search_unexisting_project(self, api_client):
273324
project = 'notfound'
274325
assert not Project.objects.filter(slug=project).exists()

0 commit comments

Comments
 (0)