diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index b636b348aa8..e4c90e1e72d 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1231,19 +1231,19 @@ def get_processed_json(self): Both lead to `foo/index.html` https://github.com/rtfd/readthedocs.org/issues/5368 """ - paths = [] + fjson_paths = [] basename = os.path.splitext(self.path)[0] - paths.append(basename + '.fjson') + fjson_paths.append(basename + '.fjson') if basename.endswith('/index'): new_basename = re.sub(r'\/index$', '', basename) - paths.append(new_basename + '.fjson') + fjson_paths.append(new_basename + '.fjson') full_json_path = self.project.get_production_media_path( type_='json', version_slug=self.version.slug, include_file=False ) try: - for path in paths: - file_path = os.path.join(full_json_path, path) + for fjson_path in fjson_paths: + file_path = os.path.join(full_json_path, fjson_path) if os.path.exists(file_path): return process_file(file_path) except Exception: diff --git a/readthedocs/search/api.py b/readthedocs/search/api.py index 8084d793f74..57c8efb9186 100644 --- a/readthedocs/search/api.py +++ b/readthedocs/search/api.py @@ -1,14 +1,14 @@ import logging from pprint import pformat -from rest_framework import generics -from rest_framework import serializers +from rest_framework import generics, serializers from rest_framework.exceptions import ValidationError from rest_framework.pagination import PageNumberPagination from readthedocs.search.faceted_search import PageSearch from readthedocs.search.utils import get_project_list_or_404 + log = logging.getLogger(__name__) diff --git a/readthedocs/search/documents.py b/readthedocs/search/documents.py index 28a3a61f477..5e9d950f272 100644 --- a/readthedocs/search/documents.py +++ b/readthedocs/search/documents.py @@ -116,6 +116,7 @@ class PageDocument(RTDDocTypeMixin, DocType): project = fields.KeywordField(attr='project.slug') version = fields.KeywordField(attr='version.slug') path = fields.KeywordField(attr='processed_json.path') + full_path = fields.KeywordField(attr='path') # Searchable content title = fields.TextField(attr='processed_json.title') @@ -153,7 +154,7 @@ def faceted_search( def get_queryset(self): """Overwrite default queryset to filter certain files to index.""" - queryset = super(PageDocument, self).get_queryset() + queryset = super().get_queryset() # Do not index files that belong to non sphinx project # Also do not index certain files diff --git a/readthedocs/search/parse_json.py b/readthedocs/search/parse_json.py index a37be7dfc63..a3593056204 100644 --- a/readthedocs/search/parse_json.py +++ b/readthedocs/search/parse_json.py @@ -59,38 +59,41 @@ def generate_sections_from_pyquery(body): } -def process_file(filename): - """Read a file from disk and parse it into a structured dict.""" +def process_file(fjson_filename): + """Read the fjson file from disk and parse it into a structured dict.""" try: - with codecs.open(filename, encoding='utf-8', mode='r') as f: + with codecs.open(fjson_filename, encoding='utf-8', mode='r') as f: file_contents = f.read() except IOError: - log.info('Unable to read file: %s', filename) - return None + log.info('Unable to read file: %s', fjson_filename) + raise data = json.loads(file_contents) sections = [] + path = '' title = '' body_content = '' + if 'current_page_name' in data: path = data['current_page_name'] else: - log.info('Unable to index file due to no name %s', filename) - return None - if 'body' in data and data['body']: + log.info('Unable to index file due to no name %s', fjson_filename) + + if data.get('body'): body = PyQuery(data['body']) body_content = body.text().replace('ΒΆ', '') sections.extend(generate_sections_from_pyquery(body)) else: - log.info('Unable to index content for: %s', filename) + log.info('Unable to index content for: %s', fjson_filename) + if 'title' in data: title = data['title'] if title.startswith('<'): title = PyQuery(data['title']).text() else: - log.info('Unable to index title for: %s', filename) + log.info('Unable to index title for: %s', fjson_filename) return { - 'headers': process_headers(data, filename), + 'headers': process_headers(data, fjson_filename), 'content': body_content, 'path': path, 'title': title, diff --git a/readthedocs/templates/search/elastic_search.html b/readthedocs/templates/search/elastic_search.html index 149e0addbf6..a7e33862a30 100644 --- a/readthedocs/templates/search/elastic_search.html +++ b/readthedocs/templates/search/elastic_search.html @@ -210,7 +210,7 @@