readthedocs · stsewd · Nov 19, 2020 · Nov 2, 2020 · Nov 2, 2020 · Nov 2, 2020
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
@@ -1568,8 +1568,14 @@ def add_features(sender, **kwargs):
     SKIP_SYNC_VERSIONS = 'skip_sync_versions'
     CACHED_ENVIRONMENT = 'cached_environment'
     LIMIT_CONCURRENT_BUILDS = 'limit_concurrent_builds'
+
+    # Search related features
     DISABLE_SERVER_SIDE_SEARCH = 'disable_server_side_search'
     ENABLE_MKDOCS_SERVER_SIDE_SEARCH = 'enable_mkdocs_server_side_search'
+    DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search'
+    INDEX_FROM_HTML_FILES = 'index_from_html_files'
+    SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION = 'search_subprojects_on_default_version'
+
     FORCE_SPHINX_FROM_VENV = 'force_sphinx_from_venv'
     LIST_PACKAGES_INSTALLED_ENV = 'list_packages_installed_env'
     VCS_REMOTE_LISTING = 'vcs_remote_listing'
@@ -1578,8 +1584,6 @@ def add_features(sender, **kwargs):
     USE_SPHINX_BUILDERS = 'use_sphinx_builders'
     DEDUPLICATE_BUILDS = 'deduplicate_builds'
     USE_SPHINX_RTD_EXT_LATEST = 'rtd_sphinx_ext_latest'
-    DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search'
-    INDEX_FROM_HTML_FILES = 'index_from_html_files'
     DONT_CREATE_INDEX = 'dont_create_index'
     USE_NEW_PIP_RESOLVER = 'use_new_pip_resolver'
     DONT_INSTALL_LATEST_PIP = 'dont_install_latest_pip'
@@ -1667,6 +1671,8 @@ def add_features(sender, **kwargs):
             LIMIT_CONCURRENT_BUILDS,
             _('Limit the amount of concurrent builds'),
         ),
+
+        # Search related features.
         (
             DISABLE_SERVER_SIDE_SEARCH,
             _('Disable server side search'),
@@ -1675,6 +1681,22 @@ def add_features(sender, **kwargs):
             ENABLE_MKDOCS_SERVER_SIDE_SEARCH,
             _('Enable server side search for MkDocs projects'),
         ),
+        (
+            DEFAULT_TO_FUZZY_SEARCH,
+            _('Default to fuzzy search for simple search queries'),
+        ),
+        (
+            INDEX_FROM_HTML_FILES,
+            _('Index content directly from html files instead or relying in other sources'),
+        ),
+        (
+            SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION,
+            _(
+                'When searching subprojects default to its default version if it doesn\'t '
+                'have the same version as the main project'
+            ),
+        ),
+
         (
             FORCE_SPHINX_FROM_VENV,
             _('Force to use Sphinx from the current virtual environment'),
@@ -1710,14 +1732,6 @@ def add_features(sender, **kwargs):
             USE_SPHINX_RTD_EXT_LATEST,
             _('Use latest version of the Read the Docs Sphinx extension'),
         ),
-        (
-            DEFAULT_TO_FUZZY_SEARCH,
-            _('Default to fuzzy search for simple search queries'),
-        ),
-        (
-            INDEX_FROM_HTML_FILES,
-            _('Index content directly from html files instead or relying in other sources'),
-        ),
         (
             DONT_CREATE_INDEX,
             _('Do not create index.md or README.rst if the project does not have one.'),

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
@@ -15,7 +15,7 @@
 from readthedocs.search import tasks
 from readthedocs.search.faceted_search import PageSearch
 
-from .serializers import PageSearchSerializer
+from .serializers import PageSearchSerializer, VersionData
 
 log = logging.getLogger(__name__)
 
@@ -185,67 +185,94 @@ def _validate_query_params(self):
 
     def _get_all_projects_data(self):
         """
-        Return a dict containing the project slug and its version URL and version's doctype.
-
-        The dictionary contains the project and its subprojects. Each project's
-        slug is used as a key and a tuple with the documentation URL and doctype
-        from the version. Example:
-
-        {
-            "requests": (
-                "https://requests.readthedocs.io/en/latest/",
-                "sphinx",
-            ),
-            "requests-oauth": (
-                "https://requests-oauth.readthedocs.io/en/latest/",
-                "sphinx_htmldir",
-            ),
-        }
+        Return a dictionary of the project itself and all its subprojects.
 
-        :rtype: dict
-        """
-        all_projects = self._get_all_projects()
-        version_slug = self._get_version().slug
-        project_urls = {}
-        for project in all_projects:
-            project_urls[project.slug] = project.get_docs_url(version_slug=version_slug)
-
-        versions_doctype = (
-            Version.objects
-            .filter(project__slug__in=project_urls.keys(), slug=version_slug)
-            .values_list('project__slug', 'documentation_type')
-        )
+        Example:
 
-        projects_data = {
-            project_slug: (project_urls[project_slug], doctype)
-            for project_slug, doctype in versions_doctype
-        }
-        return projects_data
+        .. code::
 
-    def _get_all_projects(self):
-        """
-        Returns a list of the project itself and all its subprojects the user has permissions over.
+           {
+               "requests": VersionData(
+                   "latest",
+                   "sphinx",
+                   "https://requests.readthedocs.io/en/latest/",
+               ),
+               "requests-oauth": VersionData(
+                   "latest",
+                   "sphinx_htmldir",
+                   "https://requests-oauth.readthedocs.io/en/latest/",
+               ),
+           }
+
+        .. note:: The response is cached into the instance.
 
-        :rtype: list
+        :rtype: A dictionary of project slugs mapped to a `VersionData` object.
         """
+        cache_key = '__cached_projects_data'
+        projects_data = getattr(self, cache_key, None)
+        if projects_data is not None:
+            return projects_data
+
         main_version = self._get_version()
         main_project = self._get_project()
 
-        all_projects = [main_project]
+        projects_data = {
+            main_project.slug: VersionData(
+                slug=main_version.slug,
+                doctype=main_version.documentation_type,
+                docs_url=main_project.get_docs_url(version_slug=main_version.slug),
+            )
+        }
 
         subprojects = Project.objects.filter(
             superprojects__parent_id=main_project.id,
         )
         for project in subprojects:
-            version = (
-                Version.internal
-                .public(user=self.request.user, project=project, include_hidden=False)
-                .filter(slug=main_version.slug)
-                .first()
+            version = self._get_subproject_version(
+                version_slug=main_version.slug,
+                subproject=project,
             )
-            if version:
-                all_projects.append(version.project)
-        return all_projects
+
+            # Fallback to the default version of the subproject.
+            if (
+                not version
+                and main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION)
+                and project.default_version
+            ):
+                version = self._get_subproject_version(
+                    version_slug=project.default_version,
+                    subproject=project,
+                )
+
+            if version and self._has_permission(self.request.user, version):
+                url = project.get_docs_url(version_slug=version.slug)
+                projects_data[project.slug] = VersionData(
+                    slug=version.slug,
+                    doctype=version.documentation_type,
+                    docs_url=url,
+                )
+
+        setattr(self, cache_key, projects_data)
+        return projects_data
+
+    def _get_subproject_version(self, version_slug, subproject):
+        """Get a version from the subproject."""
+        return (
+            Version.internal
+            .public(user=self.request.user, project=subproject, include_hidden=False)
+            .filter(slug=version_slug)
+            .first()
+        )
+
+    def _has_permission(self, user, version):
+        """
+        Check if `user` is authorized to access `version`.
+
+        The queryset from `_get_subproject_version` already filters public
+        projects. This is mainly to be overriden in .com to make use of
+        the auth backends in the proxied API.
+        """
+        return True
 
     def _record_query(self, response):
         project_slug = self._get_project().slug
@@ -275,26 +302,40 @@ def get_queryset(self):
            calling ``search.execute().hits``. This is why an DSL search object
            is compatible with DRF's paginator.
         """
+        main_project = self._get_project()
+        main_version = self._get_version()
+        projects = {}
         filters = {}
-        filters['project'] = [p.slug for p in self._get_all_projects()]
-        filters['version'] = self._get_version().slug
 
-        # Check to avoid searching all projects in case these filters are empty.
-        if not filters['project']:
-            log.info('Unable to find a project to search')
-            return []
-        if not filters['version']:
-            log.info('Unable to find a version to search')
-            return []
+        if main_project.has_feature(Feature.SEARCH_SUBPROJECTS_ON_DEFAULT_VERSION):
+            projects = {
+                project: version.slug
+                for project, version in self._get_all_projects_data().items()
+            }
+            # Check to avoid searching all projects in case it's empty.
+            if not projects:
+                log.info('Unable to find a version to search')
+                return []
+        else:
+            filters['project'] = list(self._get_all_projects_data().keys())
+            filters['version'] = main_version.slug
+            # Check to avoid searching all projects in case these filters are empty.
+            if not filters['project']:
+                log.info('Unable to find a project to search')
+                return []
+            if not filters['version']:
+                log.info('Unable to find a version to search')
+                return []
 
         query = self.request.query_params['q']
         queryset = PageSearch(
             query=query,
+            projects=projects,
             filters=filters,
             user=self.request.user,
             # We use a permission class to control authorization
             filter_by_user=False,
-            use_advanced_query=not self._get_project().has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH),
+            use_advanced_query=not main_project.has_feature(Feature.DEFAULT_TO_FUZZY_SEARCH),
         )
         return queryset
 

diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
@@ -11,6 +11,7 @@
     MultiMatch,
     Nested,
     SimpleQueryString,
+    Term,
     Wildcard,
 )
 
@@ -35,12 +36,23 @@ class RTDFacetedSearch(FacetedSearch):
         'post_tags': ['</span>'],
     }
 
-    def __init__(self, query=None, filters=None, user=None, use_advanced_query=True, **kwargs):
+    def __init__(
+            self,
+            query=None,
+            filters=None,
+            projects=None,
+            user=None,
+            use_advanced_query=True,
+            **kwargs,
+    ):
         """
         Pass in a user in order to filter search results by privacy.
 
-        If `use_advanced_query` is `True`,
-        force to always use `SimpleQueryString` for the text query object.
+        :param projects: A dictionary of project slugs mapped to a `VersionData` object.
+        Results are filter with these values.
+
+        :param use_advanced_query: If `True` forces to always use
+        `SimpleQueryString` for the text query object.
 
         .. warning::
 
@@ -50,6 +62,7 @@ def __init__(self, query=None, filters=None, user=None, use_advanced_query=True,
         self.user = user
         self.filter_by_user = kwargs.pop('filter_by_user', True)
         self.use_advanced_query = use_advanced_query
+        self.projects = projects or {}
 
         # Hack a fix to our broken connection pooling
         # This creates a new connection on every request,
@@ -259,7 +272,12 @@ def total_count(self):
         return s.hits.total
 
     def query(self, search, query):
-        """Manipulates the query to support nested queries and a custom rank for pages."""
+        """
+        Manipulates the query to support nested queries and a custom rank for pages.
+
+        If `self.projects` was given, we use it to filter the documents that
+        match the same project and version.
+        """
         search = search.highlight_options(**self._highlight_options)
 
         queries = self._get_queries(
@@ -280,8 +298,22 @@ def query(self, search, query):
         )
 
         queries.extend([sections_nested_query, domains_nested_query])
+        bool_query = Bool(should=queries)
+
+        if self.projects:
+            versions_query = [
+                Bool(
+                    must=[
+                        Term(project={'value': project}),
+                        Term(version={'value': version}),
+                    ]
+                )
+                for project, version in self.projects.items()
+            ]
+            bool_query = Bool(must=[bool_query, Bool(should=versions_query)])
+
         final_query = FunctionScore(
-            query=Bool(should=queries),
+            query=bool_query,
             script_score=self._get_script_score(),
         )
         search = search.query(final_query)

diff --git a/readthedocs/search/serializers.py b/readthedocs/search/serializers.py
@@ -20,8 +20,8 @@
 from readthedocs.projects.models import Project
 
 
-# Structure used for storing cached data of a project mostly.
-ProjectData = namedtuple('ProjectData', ['docs_url', 'version_doctype'])
+# Structure used for storing cached data of a version mostly.
+VersionData = namedtuple('VersionData', ['slug', 'docs_url', 'doctype'])
 
 
 class ProjectHighlightSerializer(serializers.Serializer):
@@ -88,14 +88,14 @@ def _get_full_path(self, obj):
         it's cached into ``project_data``.
         """
         # First try to build the URL from the context.
-        project_data = self.context.get('projects_data', {}).get(obj.project)
-        if project_data:
-            docs_url, doctype = project_data
+        version_data = self.context.get('projects_data', {}).get(obj.project)
+        if version_data:
+            docs_url = version_data.docs_url
             path = obj.full_path
 
             # Generate an appropriate link for the doctypes that use htmldir,
             # and always end it with / so it goes directly to proxito.
-            if doctype in {SPHINX_HTMLDIR, MKDOCS}:
+            if version_data.doctype in {SPHINX_HTMLDIR, MKDOCS}:
                 path = re.sub('(^|/)index.html$', '/', path)
 
             return docs_url.rstrip('/') + '/' + path.lstrip('/')
@@ -106,7 +106,11 @@ def _get_full_path(self, obj):
             docs_url = project.get_docs_url(version_slug=obj.version)
             # cache the project URL
             projects_data = self.context.setdefault('projects_data', {})
-            projects_data[obj.project] = ProjectData(docs_url, '')
+            projects_data[obj.project] = VersionData(
+                slug=obj.version,
+                docs_url=docs_url,
+                doctype=None,
+            )
             return docs_url + obj.full_path
 
         return None