From 9cacb2cf35852c8f6d0652f44e6aaeb78b58a511 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 2 Nov 2020 17:11:02 -0500 Subject: [PATCH 1/5] Search: refactor api view Mainly this was done to allow us support searching on different versions of subprojects. Currently, we assume we search the same version for all subprojects. - _get_all_projects_data and _get_all_projects were merged into just one method. And instead of returning a list of projects we return a dictionary of project slugs mapped to a VersionData object. - There is some duplication in .com. `_has_permission` and `_get_subproject_versions_queryset` are overridden in .com. - ProjectData was renamed to VersionData since it has more attributes related to a version than a project. --- readthedocs/search/api.py | 113 +++++++++++++++------------ readthedocs/search/serializers.py | 18 +++-- readthedocs/search/tests/test_api.py | 2 +- readthedocs/search/views.py | 8 +- 4 files changed, 82 insertions(+), 59 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 9ec7346a86b..812b5ad5949 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -15,7 +15,7 @@ from readthedocs.search import tasks from readthedocs.search.faceted_search import PageSearch -from .serializers import PageSearchSerializer +from .serializers import PageSearchSerializer, VersionData log = logging.getLogger(__name__) @@ -185,67 +185,82 @@ def _validate_query_params(self): def _get_all_projects_data(self): """ - Return a dict containing the project slug and its version URL and version's doctype. - - The dictionary contains the project and its subprojects. Each project's - slug is used as a key and a tuple with the documentation URL and doctype - from the version. Example: - - { - "requests": ( - "https://requests.readthedocs.io/en/latest/", - "sphinx", - ), - "requests-oauth": ( - "https://requests-oauth.readthedocs.io/en/latest/", - "sphinx_htmldir", - ), - } + Return a dictionary of the project itself and all its subprojects. - :rtype: dict - """ - all_projects = self._get_all_projects() - version_slug = self._get_version().slug - project_urls = {} - for project in all_projects: - project_urls[project.slug] = project.get_docs_url(version_slug=version_slug) - - versions_doctype = ( - Version.objects - .filter(project__slug__in=project_urls.keys(), slug=version_slug) - .values_list('project__slug', 'documentation_type') - ) + Example: - projects_data = { - project_slug: (project_urls[project_slug], doctype) - for project_slug, doctype in versions_doctype - } - return projects_data + .. code:: - def _get_all_projects(self): - """ - Returns a list of the project itself and all its subprojects the user has permissions over. + { + "requests": VersionData( + "latest", + "sphinx", + "https://requests.readthedocs.io/en/latest/", + ), + "requests-oauth": VersionData( + "latest", + "sphinx_htmldir", + "https://requests-oauth.readthedocs.io/en/latest/", + ), + } - :rtype: list + .. note:: The response is cached into the instance. + + :rtype: A dictionary of project slugs mapped to a `VersionData` object. """ + cache_key = '__cached_projects_data' + projects_data = getattr(self, cache_key, None) + if projects_data is not None: + return projects_data + main_version = self._get_version() main_project = self._get_project() - all_projects = [main_project] + projects_data = { + main_project.slug: VersionData( + slug=main_version.slug, + doctype=main_version.documentation_type, + docs_url=main_project.get_docs_url(version_slug=main_version.slug), + ) + } subprojects = Project.objects.filter( superprojects__parent_id=main_project.id, ) for project in subprojects: - version = ( - Version.internal - .public(user=self.request.user, project=project, include_hidden=False) - .filter(slug=main_version.slug) - .first() + version = self._get_subproject_version( + version_slug=main_version, + subproject=project, ) - if version: - all_projects.append(version.project) - return all_projects + if version and self._has_permission(self.request.user, version): + url = project.get_docs_url(version_slug=version.slug) + projects_data[project.slug] = VersionData( + slug=version.slug, + doctype=version.documentation_type, + docs_url=url, + ) + + setattr(self, cache_key, projects_data) + return projects_data + + def _get_subproject_version(self, version_slug, subproject): + """Get a version from the subproject.""" + return ( + Version.internal + .public(user=self.request.user, project=subproject, include_hidden=False) + .filter(slug=version_slug) + .first() + ) + + def _has_permission(self, user, version): + """ + Check if `user` is authorized to access `version`. + + The queryset from `_get_subproject_version` already filters public + projects. This is mainly to be overriden in .com to make use of + the auth backends in the proxied API. + """ + return True def _record_query(self, response): project_slug = self._get_project().slug @@ -276,7 +291,7 @@ def get_queryset(self): is compatible with DRF's paginator. """ filters = {} - filters['project'] = [p.slug for p in self._get_all_projects()] + filters['project'] = list(self._get_all_projects_data().keys()) filters['version'] = self._get_version().slug # Check to avoid searching all projects in case these filters are empty. diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 01b04eca56e..0e0dd2f45fe 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -20,8 +20,8 @@ from readthedocs.projects.models import Project -# Structure used for storing cached data of a project mostly. -ProjectData = namedtuple('ProjectData', ['docs_url', 'version_doctype']) +# Structure used for storing cached data of a version mostly. +VersionData = namedtuple('VersionData', ['slug', 'docs_url', 'doctype']) class ProjectHighlightSerializer(serializers.Serializer): @@ -88,14 +88,14 @@ def _get_full_path(self, obj): it's cached into ``project_data``. """ # First try to build the URL from the context. - project_data = self.context.get('projects_data', {}).get(obj.project) - if project_data: - docs_url, doctype = project_data + version_data = self.context.get('projects_data', {}).get(obj.project) + if version_data: + docs_url = version_data.docs_url path = obj.full_path # Generate an appropriate link for the doctypes that use htmldir, # and always end it with / so it goes directly to proxito. - if doctype in {SPHINX_HTMLDIR, MKDOCS}: + if version_data.doctype in {SPHINX_HTMLDIR, MKDOCS}: path = re.sub('(^|/)index.html$', '/', path) return docs_url.rstrip('/') + '/' + path.lstrip('/') @@ -106,7 +106,11 @@ def _get_full_path(self, obj): docs_url = project.get_docs_url(version_slug=obj.version) # cache the project URL projects_data = self.context.setdefault('projects_data', {}) - projects_data[obj.project] = ProjectData(docs_url, '') + projects_data[obj.project] = VersionData( + slug=obj.version, + docs_url=docs_url, + doctype=None, + ) return docs_url + obj.full_path return None diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 990e193472f..0eaecf96d2b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -293,7 +293,7 @@ def test_doc_search_unexisting_version(self, api_client, project): resp = self.get_search(api_client, search_params) assert resp.status_code == 404 - @mock.patch.object(PageSearchAPIView, '_get_all_projects', list) + @mock.patch.object(PageSearchAPIView, '_get_all_projects_data', dict) def test_get_all_projects_returns_empty_results(self, api_client, project): """If there is a case where `_get_all_projects` returns empty, we could be querying all projects.""" diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 1ebe46f056a..275e4998d34 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -15,8 +15,8 @@ from .serializers import ( PageSearchSerializer, - ProjectData, ProjectSearchSerializer, + VersionData, ) log = logging.getLogger(__name__) @@ -63,7 +63,11 @@ def _get_project_data(self, project, version_slug): ) docs_url = project.get_docs_url(version_slug=version_slug) project_data = { - project.slug: ProjectData(docs_url, version_doctype) + project.slug: VersionData( + slug=version_slug, + docs_url=docs_url, + doctype=version_doctype, + ) } return project_data From 765ebecfc78773df2725147aba183cead4fcaa3d Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 2 Nov 2020 17:11:02 -0500 Subject: [PATCH 2/5] Search: refactor api view Mainly this was done to allow us support searching on different versions of subprojects. Currently, we assume we search the same version for all subprojects. - _get_all_projects_data and _get_all_projects were merged into just one method. And instead of returning a list of projects we return a dictionary of project slugs mapped to a VersionData object. - There is some duplication in .com. `_has_permission` and `_get_subproject_versions_queryset` are overridden in .com. - ProjectData was renamed to VersionData since it has more attributes related to a version than a project. --- readthedocs/search/api.py | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 812b5ad5949..fe24fb1b6e2 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -229,9 +229,16 @@ def _get_all_projects_data(self): ) for project in subprojects: version = self._get_subproject_version( - version_slug=main_version, + version_slug=main_version.slug, subproject=project, ) + # Fallback to the default version of the subproject. + if not version and project.default_version: + version = self._get_subproject_version( + version_slug=project.default_version, + subproject=project, + ) + if version and self._has_permission(self.request.user, version): url = project.get_docs_url(version_slug=version.slug) projects_data[project.slug] = VersionData( @@ -290,22 +297,20 @@ def get_queryset(self): calling ``search.execute().hits``. This is why an DSL search object is compatible with DRF's paginator. """ - filters = {} - filters['project'] = list(self._get_all_projects_data().keys()) - filters['version'] = self._get_version().slug + projects = { + project: version.slug + for project, version in self._get_all_projects_data().items() + } - # Check to avoid searching all projects in case these filters are empty. - if not filters['project']: - log.info('Unable to find a project to search') - return [] - if not filters['version']: + # Check to avoid searching all projects in case it's empty. + if not projects: log.info('Unable to find a version to search') return [] query = self.request.query_params['q'] queryset = PageSearch( query=query, - filters=filters, + projects=projects, user=self.request.user, # We use a permission class to control authorization filter_by_user=False, From c908174c184c9bc5c4dcd67a8c64cf8b3fe10cfa Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 2 Nov 2020 17:51:41 -0500 Subject: [PATCH 3/5] Search: allow to search on different versions of subprojects --- readthedocs/search/faceted_search.py | 42 ++++++++++++++++++++++++---- 1 file changed, 37 insertions(+), 5 deletions(-) diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py index c7305ae71cd..6c6e7c2a850 100644 --- a/readthedocs/search/faceted_search.py +++ b/readthedocs/search/faceted_search.py @@ -11,6 +11,7 @@ MultiMatch, Nested, SimpleQueryString, + Term, Wildcard, ) @@ -35,12 +36,23 @@ class RTDFacetedSearch(FacetedSearch): 'post_tags': [''], } - def __init__(self, query=None, filters=None, user=None, use_advanced_query=True, **kwargs): + def __init__( + self, + query=None, + filters=None, + projects=None, + user=None, + use_advanced_query=True, + **kwargs, + ): """ Pass in a user in order to filter search results by privacy. - If `use_advanced_query` is `True`, - force to always use `SimpleQueryString` for the text query object. + :param projects: A dictionary of project slugs mapped to a `VersionData` object. + Results are filter with these values. + + :param use_advanced_query: If `True` forces to always use + `SimpleQueryString` for the text query object. .. warning:: @@ -50,6 +62,7 @@ def __init__(self, query=None, filters=None, user=None, use_advanced_query=True, self.user = user self.filter_by_user = kwargs.pop('filter_by_user', True) self.use_advanced_query = use_advanced_query + self.projects = projects or {} # Hack a fix to our broken connection pooling # This creates a new connection on every request, @@ -259,7 +272,12 @@ def total_count(self): return s.hits.total def query(self, search, query): - """Manipulates the query to support nested queries and a custom rank for pages.""" + """ + Manipulates the query to support nested queries and a custom rank for pages. + + If `self.projects` was given, we use it to filter the documents that + match the same project and version. + """ search = search.highlight_options(**self._highlight_options) queries = self._get_queries( @@ -280,8 +298,22 @@ def query(self, search, query): ) queries.extend([sections_nested_query, domains_nested_query]) + bool_query = Bool(should=queries) + + if self.projects: + versions_query = [ + Bool( + must=[ + Term(project={'value': project}), + Term(version={'value': version}), + ] + ) + for project, version in self.projects.items() + ] + bool_query = Bool(must=[bool_query, Bool(should=versions_query)]) + final_query = FunctionScore( - query=Bool(should=queries), + query=bool_query, script_score=self._get_script_score(), ) search = search.query(final_query) From e5c0337b08a165c9f60cb4ede7ffacd2a5f2431c Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Mon, 2 Nov 2020 17:11:02 -0500 Subject: [PATCH 4/5] Search: refactor api view Mainly this was done to allow us support searching on different versions of subprojects. Currently, we assume we search the same version for all subprojects. - _get_all_projects_data and _get_all_projects were merged into just one method. And instead of returning a list of projects we return a dictionary of project slugs mapped to a VersionData object. - There is some duplication in .com. `_has_permission` and `_get_subproject_versions_queryset` are overridden in .com. - ProjectData was renamed to VersionData since it has more attributes related to a version than a project. --- readthedocs/search/api.py | 113 +++++++++++++++------------ readthedocs/search/serializers.py | 18 +++-- readthedocs/search/tests/test_api.py | 2 +- readthedocs/search/views.py | 8 +- 4 files changed, 82 insertions(+), 59 deletions(-) diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 9ec7346a86b..332375249c9 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -15,7 +15,7 @@ from readthedocs.search import tasks from readthedocs.search.faceted_search import PageSearch -from .serializers import PageSearchSerializer +from .serializers import PageSearchSerializer, VersionData log = logging.getLogger(__name__) @@ -185,67 +185,82 @@ def _validate_query_params(self): def _get_all_projects_data(self): """ - Return a dict containing the project slug and its version URL and version's doctype. - - The dictionary contains the project and its subprojects. Each project's - slug is used as a key and a tuple with the documentation URL and doctype - from the version. Example: - - { - "requests": ( - "https://requests.readthedocs.io/en/latest/", - "sphinx", - ), - "requests-oauth": ( - "https://requests-oauth.readthedocs.io/en/latest/", - "sphinx_htmldir", - ), - } + Return a dictionary of the project itself and all its subprojects. - :rtype: dict - """ - all_projects = self._get_all_projects() - version_slug = self._get_version().slug - project_urls = {} - for project in all_projects: - project_urls[project.slug] = project.get_docs_url(version_slug=version_slug) - - versions_doctype = ( - Version.objects - .filter(project__slug__in=project_urls.keys(), slug=version_slug) - .values_list('project__slug', 'documentation_type') - ) + Example: - projects_data = { - project_slug: (project_urls[project_slug], doctype) - for project_slug, doctype in versions_doctype - } - return projects_data + .. code:: - def _get_all_projects(self): - """ - Returns a list of the project itself and all its subprojects the user has permissions over. + { + "requests": VersionData( + "latest", + "sphinx", + "https://requests.readthedocs.io/en/latest/", + ), + "requests-oauth": VersionData( + "latest", + "sphinx_htmldir", + "https://requests-oauth.readthedocs.io/en/latest/", + ), + } - :rtype: list + .. note:: The response is cached into the instance. + + :rtype: A dictionary of project slugs mapped to a `VersionData` object. """ + cache_key = '__cached_projects_data' + projects_data = getattr(self, cache_key, None) + if projects_data is not None: + return projects_data + main_version = self._get_version() main_project = self._get_project() - all_projects = [main_project] + projects_data = { + main_project.slug: VersionData( + slug=main_version.slug, + doctype=main_version.documentation_type, + docs_url=main_project.get_docs_url(version_slug=main_version.slug), + ) + } subprojects = Project.objects.filter( superprojects__parent_id=main_project.id, ) for project in subprojects: - version = ( - Version.internal - .public(user=self.request.user, project=project, include_hidden=False) - .filter(slug=main_version.slug) - .first() + version = self._get_subproject_version( + version_slug=main_version.slug, + subproject=project, ) - if version: - all_projects.append(version.project) - return all_projects + if version and self._has_permission(self.request.user, version): + url = project.get_docs_url(version_slug=version.slug) + projects_data[project.slug] = VersionData( + slug=version.slug, + doctype=version.documentation_type, + docs_url=url, + ) + + setattr(self, cache_key, projects_data) + return projects_data + + def _get_subproject_version(self, version_slug, subproject): + """Get a version from the subproject.""" + return ( + Version.internal + .public(user=self.request.user, project=subproject, include_hidden=False) + .filter(slug=version_slug) + .first() + ) + + def _has_permission(self, user, version): + """ + Check if `user` is authorized to access `version`. + + The queryset from `_get_subproject_version` already filters public + projects. This is mainly to be overriden in .com to make use of + the auth backends in the proxied API. + """ + return True def _record_query(self, response): project_slug = self._get_project().slug @@ -276,7 +291,7 @@ def get_queryset(self): is compatible with DRF's paginator. """ filters = {} - filters['project'] = [p.slug for p in self._get_all_projects()] + filters['project'] = list(self._get_all_projects_data().keys()) filters['version'] = self._get_version().slug # Check to avoid searching all projects in case these filters are empty. diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py index 01b04eca56e..0e0dd2f45fe 100644 --- a/readthedocs/search/serializers.py +++ b/readthedocs/search/serializers.py @@ -20,8 +20,8 @@ from readthedocs.projects.models import Project -# Structure used for storing cached data of a project mostly. -ProjectData = namedtuple('ProjectData', ['docs_url', 'version_doctype']) +# Structure used for storing cached data of a version mostly. +VersionData = namedtuple('VersionData', ['slug', 'docs_url', 'doctype']) class ProjectHighlightSerializer(serializers.Serializer): @@ -88,14 +88,14 @@ def _get_full_path(self, obj): it's cached into ``project_data``. """ # First try to build the URL from the context. - project_data = self.context.get('projects_data', {}).get(obj.project) - if project_data: - docs_url, doctype = project_data + version_data = self.context.get('projects_data', {}).get(obj.project) + if version_data: + docs_url = version_data.docs_url path = obj.full_path # Generate an appropriate link for the doctypes that use htmldir, # and always end it with / so it goes directly to proxito. - if doctype in {SPHINX_HTMLDIR, MKDOCS}: + if version_data.doctype in {SPHINX_HTMLDIR, MKDOCS}: path = re.sub('(^|/)index.html$', '/', path) return docs_url.rstrip('/') + '/' + path.lstrip('/') @@ -106,7 +106,11 @@ def _get_full_path(self, obj): docs_url = project.get_docs_url(version_slug=obj.version) # cache the project URL projects_data = self.context.setdefault('projects_data', {}) - projects_data[obj.project] = ProjectData(docs_url, '') + projects_data[obj.project] = VersionData( + slug=obj.version, + docs_url=docs_url, + doctype=None, + ) return docs_url + obj.full_path return None diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 990e193472f..0eaecf96d2b 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -293,7 +293,7 @@ def test_doc_search_unexisting_version(self, api_client, project): resp = self.get_search(api_client, search_params) assert resp.status_code == 404 - @mock.patch.object(PageSearchAPIView, '_get_all_projects', list) + @mock.patch.object(PageSearchAPIView, '_get_all_projects_data', dict) def test_get_all_projects_returns_empty_results(self, api_client, project): """If there is a case where `_get_all_projects` returns empty, we could be querying all projects.""" diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py index 1ebe46f056a..275e4998d34 100644 --- a/readthedocs/search/views.py +++ b/readthedocs/search/views.py @@ -15,8 +15,8 @@ from .serializers import ( PageSearchSerializer, - ProjectData, ProjectSearchSerializer, + VersionData, ) log = logging.getLogger(__name__) @@ -63,7 +63,11 @@ def _get_project_data(self, project, version_slug): ) docs_url = project.get_docs_url(version_slug=version_slug) project_data = { - project.slug: ProjectData(docs_url, version_doctype) + project.slug: VersionData( + slug=version_slug, + docs_url=docs_url, + doctype=version_doctype, + ) } return project_data From 628edad68eaa93a3ab69741d914cb8b5a6ad9960 Mon Sep 17 00:00:00 2001 From: Santos Gallegos Date: Tue, 3 Nov 2020 14:00:43 -0500 Subject: [PATCH 5/5] Put under feature flag --- readthedocs/projects/models.py | 34 +++++++++++++------ readthedocs/search/api.py | 43 +++++++++++++++++------ readthedocs/search/tests/test_api.py | 51 ++++++++++++++++++++++++++++ 3 files changed, 107 insertions(+), 21 deletions(-) diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 71f7679b1a4..839f758b2ad 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1568,8 +1568,14 @@ def add_features(sender, **kwargs): SKIP_SYNC_VERSIONS = 'skip_sync_versions' CACHED_ENVIRONMENT = 'cached_environment' LIMIT_CONCURRENT_BUILDS = 'limit_concurrent_builds' + + # Search related features DISABLE_SERVER_SIDE_SEARCH = 'disable_server_side_search' ENABLE_MKDOCS_SERVER_SIDE_SEARCH = 'enable_mkdocs_server_side_search' + DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search' + INDEX_FROM_HTML_FILES = 'index_from_html_files' + SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION = 'search_subprojects_on_default_version' + FORCE_SPHINX_FROM_VENV = 'force_sphinx_from_venv' LIST_PACKAGES_INSTALLED_ENV = 'list_packages_installed_env' VCS_REMOTE_LISTING = 'vcs_remote_listing' @@ -1578,8 +1584,6 @@ def add_features(sender, **kwargs): USE_SPHINX_BUILDERS = 'use_sphinx_builders' DEDUPLICATE_BUILDS = 'deduplicate_builds' USE_SPHINX_RTD_EXT_LATEST = 'rtd_sphinx_ext_latest' - DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search' - INDEX_FROM_HTML_FILES = 'index_from_html_files' DONT_CREATE_INDEX = 'dont_create_index' USE_NEW_PIP_RESOLVER = 'use_new_pip_resolver' DONT_INSTALL_LATEST_PIP = 'dont_install_latest_pip' @@ -1667,6 +1671,8 @@ def add_features(sender, **kwargs): LIMIT_CONCURRENT_BUILDS, _('Limit the amount of concurrent builds'), ), + + # Search related features. ( DISABLE_SERVER_SIDE_SEARCH, _('Disable server side search'), @@ -1675,6 +1681,22 @@ def add_features(sender, **kwargs): ENABLE_MKDOCS_SERVER_SIDE_SEARCH, _('Enable server side search for MkDocs projects'), ), + ( + DEFAULT_TO_FUZZY_SEARCH, + _('Default to fuzzy search for simple search queries'), + ), + ( + INDEX_FROM_HTML_FILES, + _('Index content directly from html files instead or relying in other sources'), + ), + ( + SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION, + _( + 'When searching subprojects default to its default version if it doesn\'t ' + 'have the same version as the main project' + ), + ), + ( FORCE_SPHINX_FROM_VENV, _('Force to use Sphinx from the current virtual environment'), @@ -1710,14 +1732,6 @@ def add_features(sender, **kwargs): USE_SPHINX_RTD_EXT_LATEST, _('Use latest version of the Read the Docs Sphinx extension'), ), - ( - DEFAULT_TO_FUZZY_SEARCH, - _('Default to fuzzy search for simple search queries'), - ), - ( - INDEX_FROM_HTML_FILES, - _('Index content directly from html files instead or relying in other sources'), - ), ( DONT_CREATE_INDEX, _('Do not create index.md or README.rst if the project does not have one.'), diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index fe24fb1b6e2..de326a2b1c0 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -232,8 +232,13 @@ def _get_all_projects_data(self): version_slug=main_version.slug, subproject=project, ) + # Fallback to the default version of the subproject. - if not version and project.default_version: + if ( + not version + and main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION) + and project.default_version + ): version = self._get_subproject_version( version_slug=project.default_version, subproject=project, @@ -297,24 +302,40 @@ def get_queryset(self): calling ``search.execute().hits``. This is why an DSL search object is compatible with DRF's paginator. """ - projects = { - project: version.slug - for project, version in self._get_all_projects_data().items() - } - - # Check to avoid searching all projects in case it's empty. - if not projects: - log.info('Unable to find a version to search') - return [] + main_project = self._get_project() + main_version = self._get_version() + projects = {} + filters = {} + + if main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION): + projects = { + project: version.slug + for project, version in self._get_all_projects_data().items() + } + # Check to avoid searching all projects in case it's empty. + if not projects: + log.info('Unable to find a version to search') + return [] + else: + filters['project'] = list(self._get_all_projects_data().keys()) + filters['version'] = main_version.slug + # Check to avoid searching all projects in case these filters are empty. + if not filters['project']: + log.info('Unable to find a project to search') + return [] + if not filters['version']: + log.info('Unable to find a version to search') + return [] query = self.request.query_params['q'] queryset = PageSearch( query=query, projects=projects, + filters=filters, user=self.request.user, # We use a permission class to control authorization filter_by_user=False, - use_advanced_query=not self._get_project().has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH), + use_advanced_query=not main_project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH), ) return queryset diff --git a/readthedocs/search/tests/test_api.py b/readthedocs/search/tests/test_api.py index 0eaecf96d2b..9e7efa9ff57 100644 --- a/readthedocs/search/tests/test_api.py +++ b/readthedocs/search/tests/test_api.py @@ -264,11 +264,62 @@ def test_doc_search_subprojects(self, api_client, all_projects): # First result should be the subproject first_result = data[0] assert first_result['project'] == subproject.slug + # The result is from the same version as the main project. + assert first_result['version'] == version.slug # Check the link is the subproject document link document_link = subproject.get_docs_url(version_slug=version.slug) link = first_result['domain'] + first_result['path'] assert document_link in link + def test_doc_search_subprojects_default_version(self, api_client, all_projects): + """Return results from subprojects that match the version from the main project or fallback to its default version.""" + project = all_projects[0] + version = project.versions.all()[0] + feature, _ = Feature.objects.get_or_create( + feature_id=Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION, + ) + project.feature_set.add(feature) + + subproject = all_projects[1] + subproject_version = subproject.versions.all()[0] + + # Change the name of the version, and make it default. + subproject_version.slug = 'different' + subproject_version.save() + subproject.default_version = subproject_version.slug + subproject.save() + subproject.versions.filter(slug=version.slug).delete() + + # Refresh index + version_files = HTMLFile.objects.all().filter(version=subproject_version) + for f in version_files: + PageDocument().update(f) + + # Add another project as subproject of the project + project.add_subproject(subproject) + + # Now search with subproject content but explicitly filter by the parent project + query = get_search_query_from_project_file(project_slug=subproject.slug) + search_params = { + 'q': query, + 'project': project.slug, + 'version': version.slug + } + resp = self.get_search(api_client, search_params) + assert resp.status_code == 200 + + data = resp.data['results'] + assert len(data) >= 1 # there may be results from another projects + + # First result should be the subproject + first_result = data[0] + assert first_result['project'] == subproject.slug + assert first_result['version'] == 'different' + # Check the link is the subproject document link + document_link = subproject.get_docs_url(version_slug=subproject_version.slug) + link = first_result['domain'] + first_result['path'] + assert document_link in link + def test_doc_search_unexisting_project(self, api_client): project = 'notfound' assert not Project.objects.filter(slug=project).exists()