readthedocs · ericholscher · Jul 12, 2019 · Jun 19, 2019 · Jun 21, 2019 · Jun 21, 2019
diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py
@@ -1252,8 +1252,6 @@ def get_processed_json(self):
                 file_path,
             )
         return {
-            'headers': [],
-            'content': '',
             'path': file_path,
             'title': '',
             'sections': [],

diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py
@@ -120,8 +120,14 @@ class PageDocument(RTDDocTypeMixin, DocType):
 
     # Searchable content
     title = fields.TextField(attr='processed_json.title')
-    headers = fields.TextField(attr='processed_json.headers')
-    content = fields.TextField(attr='processed_json.content')
+    sections = fields.NestedField(
+        attr='processed_json.sections',
+        properties={
+            'id': fields.KeywordField(),
+            'title': fields.TextField(),
+            'content': fields.TextField(),
+        }
+    )
 
     modified_model_field = 'modified_date'
 

diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py
@@ -10,17 +10,6 @@
 log = logging.getLogger(__name__)
 
 
-def process_headers(data, filename):
-    """Read headers from toc data."""
-    headers = []
-    if data.get('toc', False):
-        for element in PyQuery(data['toc'])('a'):
-            headers.append(recurse_while_none(element))
-        if None in headers:
-            log.info('Unable to index file headers for: %s', filename)
-    return headers
-
-
 def generate_sections_from_pyquery(body):
     """Given a pyquery object, generate section dicts for each section."""
     # Capture text inside h1 before the first h2
@@ -35,7 +24,7 @@ def generate_sections_from_pyquery(body):
             if next_p[0].tag == 'div' and 'class' in next_p[0].attrib:
                 if 'section' in next_p[0].attrib['class']:
                     break
-            h1_content += '\n%s\n' % next_p.html()
+            h1_content += '\n%s\n' % next_p.text()
             next_p = next_p.next()
         if h1_content:
             yield {
@@ -51,7 +40,7 @@ def generate_sections_from_pyquery(body):
         header = section_list.eq(num)
         title = header.text().replace('¶', '').strip()
         section_id = div.attr('id')
-        content = div.html()
+        content = div.text()
         yield {
             'id': section_id,
             'title': title,
@@ -71,7 +60,6 @@ def process_file(fjson_filename):
     sections = []
     path = ''
     title = ''
-    body_content = ''
 
     if 'current_page_name' in data:
         path = data['current_page_name']
@@ -80,7 +68,6 @@ def process_file(fjson_filename):
 
     if data.get('body'):
         body = PyQuery(data['body'])
-        body_content = body.text().replace('¶', '')
         sections.extend(generate_sections_from_pyquery(body))
     else:
         log.info('Unable to index content for: %s', fjson_filename)
@@ -93,24 +80,7 @@ def process_file(fjson_filename):
         log.info('Unable to index title for: %s', fjson_filename)
 
     return {
-        'headers': process_headers(data, fjson_filename),
-        'content': body_content,
         'path': path,
         'title': title,
         'sections': sections,
     }
-
-
-def recurse_while_none(element):
-    """
-    Traverse the ``element`` until a non-None text is found.
-
-    :param element: element to traverse until get a non-None text.
-    :type element: pyquery.PyQuery
-
-    :returns: the first non-None value found
-    :rtype: str
-    """
-    if element.text is None:
-        return recurse_while_none(element.getchildren()[0])
-    return element.text
diff --git a/readthedocs/settings/base.py b/readthedocs/settings/base.py
@@ -446,9 +446,6 @@ def USE_PROMOS(self):  # noqa
             'settings': {
                 'number_of_shards': 2,
                 'number_of_replicas': 0,
-                "index": {
-                    "sort.field": ["project", "version"]
-                }
             }
         },
     }