readthedocs · ericholscher · Jul 12, 2019 · Jun 19, 2019 · Jun 21, 2019 · Jun 21, 2019
diff --git a/readthedocs/core/static-src/core/js/doc-embed/search.js b/readthedocs/core/static-src/core/js/doc-embed/search.js
@@ -4,6 +4,7 @@
 
 var rtddata = require('./rtd-data');
 var xss = require('xss/lib/index');
+var MAX_RESULT_PER_SECTION = 3;
 
 
 /*
@@ -48,10 +49,10 @@ function attach_elastic_search_query(data) {
                         }
 
                         // Creating the result from elements
-                        var link = doc.link + DOCUMENTATION_OPTIONS.FILE_SUFFIX +
-                                   '?highlight=' + $.urlencode(query);
+                        var link = doc.link + DOCUMENTATION_OPTIONS.FILE_SUFFIX;
+                        var highlight_link = link + "?highlight=" + $.urlencode(query);
 
-                        var item = $('<a>', {'href': link});
+                        var item = $('<a>', {'href': highlight_link});
                         item.html(title);
                         item.find('em').addClass('highlighted');
                         list_item.append(item);
@@ -68,86 +69,112 @@ function attach_elastic_search_query(data) {
 
                             var contents = $('<div class="context">');
 
+                            var section_template = '' +
+                                '<div>' +
+                                    '<a href="<%= section_subtitle_link %>">' +
+                                        '<%= section_subtitle %>' +
+                                    '</a>' +
+                                '</div>' +
+                                '<% for (var i = 0; i < section_content.length; ++i) { %>' +
+                                    '<div>' +
+                                        '<%= section_content[i] %>' +
+                                    '</div>' +
+                                '<% } %>';
+
+                            var domain_template = '' +
+                                '<div>' +
+                                    '<a href="<%= domain_subtitle_link %>">' +
+                                        '<%= domain_subtitle %>' +
+                                    '</a>' +
+                                '</div>' +
+                                '<span>' +
+                                    '<%= domain_content %>' +
+                                '</span>';
+
                             // if the result is page section
                             if(inner_hits[j].type === "sections") {
 
                                 var section = inner_hits[j];
-                                var section_subtitle = $('<div class="rtd_search_subtitle">');
-                                var section_subtitle_link = $('<a href="' + link + "#" + section._source.id + '">');
-                                var section_content = $('<span>');
+                                var section_subtitle = section._source.title;
+                                var section_subtitle_link = link + "#" + section._source.id;
+                                var section_content = [section._source.content.substring(0, 100) + " ..."];
 
                                 if (section.highlight) {
                                     if (section.highlight["sections.title"]) {
-                                        section_subtitle_link.html(
-                                            xss(section.highlight["sections.title"][0])
-                                        );
-                                    } else {
-                                        section_subtitle_link.html(
-                                            section._source.title
-                                        );
+                                        section_subtitle = xss(section.highlight["sections.title"][0]);
                                     }
 
                                     if (section.highlight["sections.content"]) {
-                                        section_content.html(
-                                            "... " +
-                                            xss(section.highlight["sections.content"][0]) +
-                                            " ..."
-                                        );
-                                    } else {
-                                        section_content.html(
-                                            section._source.content.substring(0, 100) +
-                                            " ..."
-                                        );
+                                        var content = section.highlight["sections.content"];
+                                        section_content = [];
+                                        for (
+                                            var k = 0;
+                                            k < content.length && k < MAX_RESULT_PER_SECTION;
+                                             k += 1
+                                        ) {
+                                            section_content.push("... " + xss(content[k]) + " ...");
+                                        }
                                     }
                                 }
 
-                                section_subtitle.html(section_subtitle_link);
-                                contents.append(section_subtitle);
-                                contents.append(section_content);
-                                contents.find('em').addClass('highlighted');
+                                contents.append(
+                                    $u.template(
+                                        section_template,
+                                        {
+                                            section_subtitle_link: section_subtitle_link,
+                                            section_subtitle: section_subtitle,
+                                            section_content: section_content
+                                        }
+                                    )
+                                );
                             }
 
                             // if the result is a sphinx domain object
                             if (inner_hits[j].type === "domains") {
 
                                 var domain = inner_hits[j];
-                                var domain_subtitle = $('<div class="rtd_search_subtitle">');
-                                var domain_subtitle_link = $('<a href="' + link + "#" + domain._source.anchor + '">');
-                                var domain_content = $('<span>');
+                                var domain_subtitle = domain._source.role_name;
+                                var domain_subtitle_link = link + "#" + domain._source.anchor;
+                                var domain_content = "";
 
                                 if (
                                     typeof domain._source.display_name === "string" &&
-                                    domain._source.display_name.length >= 2
+                                    domain._source.display_name.length >= 1
                                 ) {
-                                    domain_subtitle_link.html(
-                                        "(" +
-                                        domain._source.role_name +
-                                        ")" +
-                                        domain._source.display_name
-                                    );
-                                } else {
-                                    domain_subtitle_link.html(domain._source.role_name);
+                                    domain_subtitle = "(" + domain._source.role_name + ") " + domain._source.display_name;
                                 }
 
                                 // preparing domain_content
-                                domain_content.append(domain._source.type_display + " -- ");
+                                // domain_content = type_display --
+                                domain_content = domain._source.type_display + " -- ";
                                 if (domain.highlight) {
                                     if (domain.highlight["domains.name"]) {
-                                        domain_content.append(
-                                            xss(domain.highlight["domains.name"][0])
-                                        );
+                                        // domain_content = type_display -- name
+                                        domain_content += xss(domain.highlight["domains.name"][0]);
                                     } else {
-                                        domain_content.append(domain._source.name);
+                                        // domain_content = type_display -- name
+                                        domain_content += domain._source.name;
                                     }
+                                } else {
+                                    // domain_content = type_display -- name
+                                    domain_content += domain._source.name;
                                 }
-                                domain_content.append(" -- in " + domain._source.doc_display);
-
-                                domain_subtitle.append(domain_subtitle_link);
-                                contents.append(domain_subtitle);
-                                contents.append(domain_content);
-                                contents.find('em').addClass('highlighted');
+                                // domain_content = type_display -- name -- in doc_display
+                                domain_content += " -- in " + domain._source.doc_display;
+
+                                contents.append(
+                                    $u.template(
+                                        domain_template,
+                                        {
+                                            domain_subtitle_link: domain_subtitle_link,
+                                            domain_subtitle: domain_subtitle,
+                                            domain_content: domain_content
+                                        }
+                                    )
+                                );
                             }
 
+                            contents.find('em').addClass('highlighted');
                             list_item.append(contents);
                             list_item.append($("<br>"));
                         }

diff --git a/readthedocs/core/static/core/js/readthedocs-doc-embed.js b/readthedocs/core/static/core/js/readthedocs-doc-embed.js
diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py
@@ -1282,13 +1282,8 @@ def fileify(version_pk, commit, build):
     except Exception:
         log.exception('Failed during ImportedFile creation')
 
-    try:
-        _update_intersphinx_data(version, path, commit, build)
-    except Exception:
-        log.exception('Failed during SphinxDomain creation')
-
 
-def _update_intersphinx_data(version, path, commit, build):
+def _create_intersphinx_data(version, path, commit, build):
     """
     Update intersphinx data for this version.
 
@@ -1376,24 +1371,6 @@ def warn(self, msg):
                 build=build,
             )
 
-    # Index new SphinxDomain objects to elasticsearch
-    index_new_files(model=SphinxDomain, version=version, build=build)
-
-    # Remove old SphinxDomain from elasticsearch
-    remove_indexed_files(
-        model=SphinxDomain,
-        version=version,
-        build=build,
-    )
-
-    # Delete SphinxDomain objects from the previous build of the version.
-    (
-        SphinxDomain.objects
-        .filter(project=version.project, version=version)
-        .exclude(build=build)
-        .delete()
-    )
-
 
 def clean_build(version_pk):
     """Clean the files used in the build of the given version."""
@@ -1481,6 +1458,12 @@ def _manage_imported_files(version, path, commit, build):
                 build=build,
             )
 
+    # create SphinxDomain objects
+    try:
+        _create_intersphinx_data(version, path, commit, build)
+    except Exception:
+        log.exception('Failed during SphinxDomain objects creation')
+
     # Index new HTMLFiles to elasticsearch
     index_new_files(model=HTMLFile, version=version, build=build)
 

diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
@@ -1,3 +1,4 @@
+import itertools
 import logging
 from pprint import pformat
 
@@ -43,18 +44,18 @@ def get_highlight(self, obj):
     def get_inner_hits(self, obj):
         inner_hits = getattr(obj.meta, 'inner_hits', None)
         if inner_hits:
-            sections = inner_hits.sections
-            domains = inner_hits.domains
-            all_results = list(sections) + list(domains)
+            sections = inner_hits.sections or []
+            domains = inner_hits.domains or []
+            all_results = itertools.chain(sections, domains)
 
-            sorted_results = [
+            sorted_results = (
                 {
                     'type': hit._nested.field,
                     '_source': hit._source.to_dict(),
                     'highlight': self._get_inner_hits_highlights(hit),
                 }
                 for hit in sorted(all_results, key=utils._get_hit_score, reverse=True)
-            ]
+            )
 
             return sorted_results
 

diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
@@ -111,18 +111,23 @@ class Meta:
 
     def prepare_domains(self, html_file):
         """Prepares and returns the values for domains field."""
-        domains_qs = html_file.sphinx_domains.all()
-        domains_qs = domains_qs.exclude(domain='std', type__in=['doc', 'label'])
-
-        all_domains = [{
-            'role_name': domain.role_name,
-            'doc_name': domain.doc_name,
-            'anchor': domain.anchor,
-            'type_display': domain.type_display,
-            'doc_display': domain.doc_display,
-            'name': domain.name,
-            'display_name': domain.display_name,
-        } for domain in domains_qs]
+        domains_qs = html_file.sphinx_domains.exclude(
+            domain='std',
+            type__in=['doc', 'label']
+        ).iterator()
+
+        all_domains = [
+            {
+                'role_name': domain.role_name,
+                'doc_name': domain.doc_name,
+                'anchor': domain.anchor,
+                'type_display': domain.type_display,
+                'doc_display': domain.doc_display,
+                'name': domain.name,
+                'display_name': domain.display_name if domain.display_name != '-' else '',
+            }
+            for domain in domains_qs
+        ]
 
         return all_domains
 

diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
@@ -101,14 +101,14 @@ class PageSearchBase(RTDFacetedSearch):
     doc_types = [PageDocument]
     index = PageDocument._doc_type.index
 
-    outer_fields = ['title']
-    section_fields = ['sections.title', 'sections.content']
-    domain_fields = [
+    _outer_fields = ['title']
+    _section_fields = ['sections.title', 'sections.content']
+    _domain_fields = [
         'domains.type_display',
         'domains.name',
         'domains.display_name',
     ]
-    fields = outer_fields
+    fields = _outer_fields
 
     # need to search for both 'and' and 'or' operations
     # the score of and should be higher as it satisfies both or and and
@@ -125,7 +125,7 @@ def query(self, search, query):
         sections_nested_query = self.generate_nested_query(
             query=query,
             path='sections',
-            fields=self.section_fields,
+            fields=self._section_fields,
             inner_hits={
                 'highlight': {
                     'number_of_fragments': 1,
@@ -141,7 +141,7 @@ def query(self, search, query):
         domains_nested_query = self.generate_nested_query(
             query=query,
             path='domains',
-            fields=self.domain_fields,
+            fields=self._domain_fields,
             inner_hits={
                 'highlight': {
                     'number_of_fragments': 1,

diff --git a/readthedocs/search/utils.py b/readthedocs/search/utils.py
@@ -178,7 +178,7 @@ def _remove_newlines_from_dict(highlight):
     """
     for k, v in highlight.items():
         if isinstance(v, dict):
-            highlight[k] = self._remove_newlines_from_dict(v)
+            highlight[k] = _remove_newlines_from_dict(v)
         else:
             # elastic returns the contents of the
             # highlighted field in a list.

diff --git a/readthedocs/search/views.py b/readthedocs/search/views.py
@@ -1,5 +1,6 @@
 """Search views."""
 import collections
+import itertools
 import logging
 from pprint import pformat
 
@@ -107,12 +108,14 @@ def elastic_search(request, project_slug=None):
 
         # sorting inner_hits (if present)
         try:
-            for hit in results.hits.hits:
-                sections = hit['inner_hits'].get('sections', [])
-                domains = hit['inner_hits'].get('domains', [])
-                all_results = list(sections) + list(domains)
+            for result in results:
 
-                sorted_results = [
+                inner_hits = result.meta.inner_hits
+                sections = inner_hits.sections or []
+                domains = inner_hits.domains or []
+                all_results = itertools.chain(sections, domains)
+
+                sorted_results = (
                     {
                         'type': hit._nested.field,
 
@@ -126,15 +129,13 @@ def elastic_search(request, project_slug=None):
                         ),
                     }
                     for hit in sorted(all_results, key=utils._get_hit_score, reverse=True)
-                ]
+                )
 
-                hit['inner_hits'].pop('sections', None)
-                hit['inner_hits'].pop('domains', None)
-                hit['inner_hits'] = sorted_results
+                result.meta.inner_hits = sorted_results
 
-        except:
+        except Exception as e:
             # if the control comes in this block,
-            # that implies that there was PageSearch
+            # that implies that there was a PageSearch
             pass
 
         log.debug('Search results: %s', pformat(results.to_dict()))