readthedocs · ericholscher · Jul 12, 2019 · Jun 19, 2019 · Jun 21, 2019 · Jun 21, 2019
diff --git a/readthedocs/rtd_tests/tests/test_search_json_parsing.py b/readthedocs/rtd_tests/tests/test_search_json_parsing.py
@@ -23,3 +23,6 @@ def test_h2_parsing(self):
             'You can use Slumber'
         ))
         self.assertEqual(data['title'], 'Read the Docs Public API')
+
+        for section in data['sections']:
+            self.assertFalse('\n' in section['content'])
diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py
@@ -48,14 +48,14 @@ def get_inner_hits(self, obj):
             domains = inner_hits.domains or []
             all_results = itertools.chain(sections, domains)
 
-            sorted_results = (
+            sorted_results = [
                 {
                     'type': hit._nested.field,
                     '_source': hit._source.to_dict(),
                     'highlight': self._get_inner_hits_highlights(hit),
                 }
                 for hit in sorted(all_results, key=utils._get_hit_score, reverse=True)
-            )
+            ]
 
             return sorted_results
 

diff --git a/readthedocs/search/faceted_search.py b/readthedocs/search/faceted_search.py
@@ -101,11 +101,11 @@ class PageSearchBase(RTDFacetedSearch):
     doc_types = [PageDocument]
     index = PageDocument._doc_type.index
 
-    _outer_fields = ['title']
-    _section_fields = ['sections.title', 'sections.content']
+    _outer_fields = ['title^4']
+    _section_fields = ['sections.title^3', 'sections.content']
     _domain_fields = [
         'domains.type_display',
-        'domains.name',
+        'domains.name^2',
         'domains.display_name',
     ]
     fields = _outer_fields

diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py
@@ -25,16 +25,13 @@ def generate_sections_from_pyquery(body):
                 if 'section' in next_p[0].attrib['class']:
                     break
 
-            h1_content += '\n%s\n' % next_p.text().replace('¶', '').strip()
-            h1_content = h1_content.split('\n')[1:]  # to remove the redundant text
-            h1_content = '\n'.join(h1_content)
-
+            h1_content += parse_content(next_p.text())
             next_p = next_p.next()
         if h1_content:
             yield {
                 'id': h1_id,
                 'title': h1_title,
-                'content': h1_content,
+                'content': h1_content.replace('\n', '. '),
             }
 
     # Capture text inside h2's
@@ -45,9 +42,8 @@ def generate_sections_from_pyquery(body):
         title = header.text().replace('¶', '').strip()
         section_id = div.attr('id')
 
-        content = div.text().replace('¶', '').strip()
-        content = content.split('\n')[1:]  # to remove the redundant text
-        content = '\n'.join(content)
+        content = div.text()
+        content = parse_content(content)
 
         yield {
             'id': section_id,
@@ -92,3 +88,23 @@ def process_file(fjson_filename):
         'title': title,
         'sections': sections,
     }
+
+
+def parse_content(content):
+    """
+    Removes the starting text and ¶.
+
+    It removes the starting text from the content
+    because it contains the the title of that content,
+    which is redundant here.
+    """
+    content = content.replace('¶', '').strip()
+
+    # removing the starting text of each
+    content = content.split('\n')
+    if len(content) > 1:  # there were \n
+        content = content[1:]
+
+    # converting newlines to ". "
+    content = '. '.join([text.strip() for text in content])
+    return content
diff --git a/readthedocs/search/tests/conftest.py b/readthedocs/search/tests/conftest.py
@@ -8,6 +8,8 @@
 
 from readthedocs.projects.models import Project, HTMLFile
 from readthedocs.search.documents import PageDocument
+from readthedocs.sphinx_domains.models import SphinxDomain
+
 from .dummy_data import ALL_PROJECTS, PROJECT_DATA_FILES
 
 
@@ -32,6 +34,28 @@ def all_projects(es_index, mock_processed_json, db, settings):
             file_name = file_basename + '.html'
             version = project.versions.all()[0]
             html_file = G(HTMLFile, project=project, version=version, name=file_name)
+
+            # creating sphinx domain test objects
+            file_path = get_json_file_path(project.slug, file_basename)
+            if os.path.exists(file_path):
+                with open (file_path) as f:
+                    data = json.load(f)
+                    domains = data['domains']
+
+                    for domain_data in domains:
+                        domain_role_name = domain_data.pop('role_name')
+                        domain, type_ = domain_role_name.split(':')
+
+                        G(
+                            SphinxDomain,
+                            project=project,
+                            version=version,
+                            html_file=html_file,
+                            domain=domain,
+                            type=type_,
+                            **domain_data
+                        )
+
             PageDocument().update(html_file)
 
         projects_list.append(project)
@@ -46,12 +70,17 @@ def project(all_projects):
     return all_projects[0]
 
 
+def get_json_file_path(project_slug, basename):
+    current_path = os.path.abspath(os.path.dirname(__file__))
+    file_name = f'{basename}.json'
+    file_path = os.path.join(current_path, 'data', project_slug, file_name)
+    return file_path
+
+
 def get_dummy_processed_json(instance):
     project_slug = instance.project.slug
     basename = os.path.splitext(instance.name)[0]
-    file_name = basename + '.json'
-    current_path = os.path.abspath(os.path.dirname(__file__))
-    file_path = os.path.join(current_path, "data", project_slug, file_name)
+    file_path = get_json_file_path(project_slug, basename)
 
     if os.path.exists(file_path):
         with open(file_path) as f:

diff --git a/readthedocs/search/tests/data/docs/story.json b/readthedocs/search/tests/data/docs/story.json
diff --git a/readthedocs/search/tests/data/docs/support.json b/readthedocs/search/tests/data/docs/support.json
@@ -0,0 +1,41 @@
+{
+    "path": "support",
+    "title": "Support",
+    "sections": [
+        {
+            "id": "usage-questions",
+            "title": "Usage Questions",
+            "content": "If you have questions about how to use Read the Docs, or have an issue that isn’t related to a bug, Stack Overflow is the best place to ask. Tag questions with read-the-docs so other folks can find them easily.. Good questions for Stack Overflow would be:. “What is the best way to structure the table of contents across a project?”. “How do I structure translations inside of my project for easiest contribution from users?”. “How do I use Sphinx to use SVG images in HTML output but PNG in PDF output?”"
+        },
+        {
+            "id": "community-support",
+            "title": "Community Support",
+            "content": "Read the Docs is supported by community contributions and advertising. We hope to bring in enough money with our Gold and Ethical Ads programs to keep Read the Docs sustainable.. All people answering your questions are doing it with their own time, so please be kind and provide as much information as possible.. Bugs & Support Issues. You can file bug reports on our GitHub issue tracker, and they will be addressed as soon as possible. Support is a volunteer effort, and there is no guaranteed response time. If you need answers quickly, you can buy commercial support below.. Reporting Issues. When reporting a bug, please include as much information as possible that will help us solve this issue. This includes:. Project name. URL. Action taken. Expected result. Actual result. Specific Requests. If you need a specific request for your project or account, like more resources, change of the project’s slug or username. Send an email to [email protected]."
+        },
+        {
+            "id": "commercial-support",
+            "title": "Commercial Support",
+            "content": "We offer commercial support for Read the Docs, commercial hosting, as well as consulting around all documentation systems. You can contact us at [email protected] to learn more, or read more at https://readthedocs.com/services/#open-source-support."
+        }
+    ],
+    "domains": [
+        {
+            "role_name": "http:post",
+            "doc_name": "api/v3.html",
+            "anchor": "post--api-v3-projects-(string-project_slug)-versions-(string-version_slug)-builds-",
+            "type_display": "post",
+            "doc_display": "API v3",
+            "name": "/api/v3/projects/(string:project_slug)/versions/(string:version_slug)/builds/",
+            "display_name": ""
+        },
+        {
+            "role_name": "http:patch",
+            "doc_name": "api/v3.html",
+            "anchor": "patch--api-v3-projects-(string-project_slug)-version-(string-version_slug)-",
+            "type_display": "patch",
+            "doc_display": "API v3",
+            "name": "/api/v3/projects/(string:project_slug)/version/(string:version_slug)/",
+            "display_name": ""
+        }
+    ]
+}
diff --git a/readthedocs/search/tests/data/docs/wiping.json b/readthedocs/search/tests/data/docs/wiping.json
@@ -1,15 +1,49 @@
 {
-  "content": "ReadtheDocsWiping a Build Environment\nSometimes it happen that your Builds start failing because the build environment where the  is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.\nIn any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.\nFollow these steps to wipe the build environment:\nGo to Versions\nClick on the Edit button of the version you want to wipe on the right side of the page\nGo to the bottom of the page and click the wipe link, next to the \u201cSave\u201d button\nNote\nBy wiping the  build environment, all the rst, md, and code files associated with it will be removed but not the  already built (HTML and PDF files). Your  will still online after wiping the build environment.\nNow you can re-build the version with a fresh build environment!",
-  "headers": [
-    "Wiping a Build Environment"
-  ],
-  "title": "Wiping a Build Environment",
-  "sections": [
-    {
-      "content": "\nSometimes it happen that your Builds start failing because the build\nenvironment where the  is created is stale or\nbroken. This could happen for a couple of different reasons like <code class=\"xref py py-obj docutils literal notranslate\"><span class=\"pre\">pip</span></code>\nnot upgrading a package properly or a corrupted cached Python package.\n\nIn any of these cases (and many others), the solution could be just\nwiping out the existing build environment files and allow Read the\nDocs to create a new fresh one.\n\nFollow these steps to wipe the build environment:\n\n\n<li>Go to <strong>Versions</strong></li>\n<li>Click on the <strong>Edit</strong> button of the version you want to wipe on the\nright side of the page</li>\n<li>Go to the bottom of the page and click the <strong>wipe</strong> link, next to\nthe \u201cSave\u201d button</li>\n\n\n\n<p class=\"first admonition-title\">Note</p>\n<p class=\"last\">By wiping the  build environment, all the <code class=\"xref py py-obj docutils literal notranslate\"><span class=\"pre\">rst</span></code>, <code class=\"xref py py-obj docutils literal notranslate\"><span class=\"pre\">md</span></code>,\nand code files associated with it will be removed but not the\n already built (<code class=\"xref py py-obj docutils literal notranslate\"><span class=\"pre\">HTML</span></code> and <code class=\"xref py py-obj docutils literal notranslate\"><span class=\"pre\">PDF</span></code> files). Your\n will still online after wiping the build environment.</p>\n\n\nNow you can re-build the version with a fresh build environment!\n",
-      "id": "wiping-a-build-environment",
-      "title": "Wiping a Build Environment"
-    }
-  ],
-  "path": "guides/wipe-environment"
+    "path": "guides/wipe-environment",
+    "title": "Wiping a Build Environment",
+    "sections": [
+        {
+            "id": "wiping-a-build-environment",
+            "title": "Wiping a Build Environment",
+            "content": "Sometimes it happen that your Builds start failing because the build environment where the documentation is created is stale or broken. This could happen for a couple of different reasons like pip not upgrading a package properly or a corrupted cached Python package.In any of these cases (and many others), the solution could be just wiping out the existing build environment files and allow Read the Docs to create a new fresh one.Follow these steps to wipe the build environment:Click on the Edit button of the version you want to wipe on the right side of the page. Go to the bottom of the page and click the wipe link, next to the “Save” buttonBy wiping the documentation build environment, all the rst, md, and code files associated with it will be removed but not the documentation already built (HTML and PDF files). Your documentation will still online after wiping the build environment.Now you can re-build the version with a fresh build environment!"
+        }
+    ],
+    "domains": [
+        {
+            "role_name": "http:get",
+            "doc_name": "api/v3.html",
+            "anchor": "get--api-v3-users-(str-username)",
+            "type_display": "get",
+            "doc_display": "API v3",
+            "name": "/api/v3/users/(str:username)",
+            "display_name": ""
+        },
+        {
+            "role_name": "http:get",
+            "doc_name": "api/v3.html",
+            "anchor": "get--api-v3-projects-(string-project_slug)-versions-(string-version_slug)-",
+            "type_display": "get",
+            "doc_display": "API v3",
+            "name": "/api/v3/projects/(string:project_slug)/versions/(string:version_slug)/",
+            "display_name": ""
+        },
+        {
+            "role_name": "http:get",
+            "doc_name": "api/v3.html",
+            "anchor": "get--api-v3-projects-(string-project_slug)-versions-",
+            "type_display": "get",
+            "doc_display": "API v3",
+            "name": "/api/v3/projects/(string:project_slug)/versions/",
+            "display_name": ""
+        },
+        {
+            "role_name": "http:get",
+            "doc_name": "api/v3.html",
+            "anchor": "get--api-v3-projects-(string-project_slug)-",
+            "type_display": "get",
+            "doc_display": "API v3",
+            "name": "/api/v3/projects/(string:project_slug)/",
+            "display_name": ""
+        }
+    ]
 }