diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 76dd2bfd4a0..8c1c338283f 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -51,7 +51,7 @@ validate_repository_url, ) from readthedocs.projects.version_handling import determine_stable_version -from readthedocs.search.parsers import GenericParser, MkDocsParser, SphinxParser +from readthedocs.search.parsers import GenericParser, SphinxParser from readthedocs.storage import build_media_storage from readthedocs.vcs_support.backends import backend_cls @@ -1527,13 +1527,12 @@ class Meta: def get_processed_json(self): if ( self.version.documentation_type == constants.GENERIC + or self.version.is_mkdocs_type or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES) ): parser_class = GenericParser elif self.version.is_sphinx_type: parser_class = SphinxParser - elif self.version.is_mkdocs_type: - parser_class = MkDocsParser else: log.warning( "Invalid documentation type", diff --git a/readthedocs/search/parsers.py b/readthedocs/search/parsers.py index 199054ac240..28d6108815f 100644 --- a/readthedocs/search/parsers.py +++ b/readthedocs/search/parsers.py @@ -3,7 +3,6 @@ import itertools import os import re -from urllib.parse import urlparse import orjson as json import structlog @@ -543,87 +542,3 @@ def _clean_body(self, body): node.decompose() return body - - -class MkDocsParser(GenericParser): - - """ - MkDocs parser. - - Index using the json index file instead of the html content. - """ - - def parse(self, page): - storage_path = self.project.get_storage_path( - type_='html', - version_slug=self.version.slug, - include_file=False, - ) - try: - file_path = self.storage.join(storage_path, 'search/search_index.json') - if self.storage.exists(file_path): - index_data = self._process_index_file(file_path, page=page) - if index_data: - return index_data - except Exception: - log.warning( - 'Unhandled exception during search processing file.', - page=page, - ) - return { - 'path': page, - 'title': '', - 'sections': [], - } - - def _process_index_file(self, json_path, page): - """Reads the json index file and parses it into a structured dict.""" - try: - with self.storage.open(json_path, mode='r') as f: - file_contents = f.read() - except IOError: - log.info('Unable to read file.', path=json_path) - raise - - data = json.loads(file_contents) - page_data = {} - - for section in data.get('docs', []): - parsed_path = urlparse(section.get('location', '')) - fragment = parsed_path.fragment - path = parsed_path.path - - # Some old versions of mkdocs - # index the pages as ``/page.html`` instead of ``page.html``. - path = path.lstrip('/') - - if path == '' or path.endswith('/'): - path += 'index.html' - - if page != path: - continue - - title = self._parse_content( - HTMLParser(section.get('title')).text() - ) - content = self._parse_content( - HTMLParser(section.get('text')).text() - ) - - # If it doesn't have a fragment, - # it means is the page itself. - if not fragment: - page_data.update({ - 'path': path, - 'title': title, - }) - # Content without a fragment need to be indexed as well, - # this happens when the page doesn't start with a header, - # or if it doesn't contain any headers at all. - page_data.setdefault('sections', []).append({ - 'id': fragment, - 'title': title, - 'content': content, - }) - - return page_data diff --git a/readthedocs/search/tests/data/mkdocs/in/search_index.json b/readthedocs/search/tests/data/mkdocs/in/search_index.json deleted file mode 100644 index ef148f1e8ad..00000000000 --- a/readthedocs/search/tests/data/mkdocs/in/search_index.json +++ /dev/null @@ -1,36 +0,0 @@ -{ - "config": { - "lang": [ - "en" - ], - "prebuild_index": false, - "separator": "[\\s\\-]+" - }, - "docs": [ - { - "location": "", - "text": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs.", - "title": "Read the Docs MkDocs Test Project" - }, - { - "location": "#read-the-docs-mkdocs-test-project", - "text": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs.", - "title": "Read the Docs MkDocs Test Project" - }, - { - "location": "versions/", - "text": "Versions & Themes There are a number of versions and themes for mkdocs.", - "title": "Versions & Themes" - }, - { - "location": "versions/#versions-themes", - "text": "Versions & Themes There are a number of versions and themes for mkdocs.", - "title": "Versions & Themes" - }, - { - "location": "no-title/", - "text": "No title", - "title": "no-title" - } - ] -} diff --git a/readthedocs/search/tests/data/mkdocs/in/search_index_old.json b/readthedocs/search/tests/data/mkdocs/in/search_index_old.json deleted file mode 100644 index 29a3b63811b..00000000000 --- a/readthedocs/search/tests/data/mkdocs/in/search_index_old.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "docs": [ - { - "location": "/", - "text": "Read the Docs MkDocs Test Project\n\n\nThis is a test of \nMkDocs\n as it appears on \nRead the Docs\n.", - "title": "Read the Docs MkDocs Test Project" - }, - { - "location": "/#read-the-docs-mkdocs-test-project", - "text": "Read the Docs MkDocs Test Project\n\n\nThis is a test of \nMkDocs\n as it appears on \nRead the Docs\n.", - "title": "Read the Docs MkDocs Test Project" - }, - { - "location": "/versions/", - "text": "Versions & Themes\n\n\nThere are a number of versions and themes for mkdocs.", - "title": "Versions & Themes" - }, - { - "location": "/versions/#versions-themes", - "text": "Versions & Themes\n\n\nThere are a number of versions and themes for mkdocs.", - "title": "Versions & Themes" - } - ] -} diff --git a/readthedocs/search/tests/data/mkdocs/out/search_index.json b/readthedocs/search/tests/data/mkdocs/out/search_index.json deleted file mode 100644 index c69e91033f2..00000000000 --- a/readthedocs/search/tests/data/mkdocs/out/search_index.json +++ /dev/null @@ -1,45 +0,0 @@ -[ - { - "title": "Read the Docs MkDocs Test Project", - "path": "index.html", - "sections": [ - { - "id": "", - "title": "Read the Docs MkDocs Test Project", - "content": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs." - }, - { - "id": "read-the-docs-mkdocs-test-project", - "title": "Read the Docs MkDocs Test Project", - "content": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs." - } - ] - }, - { - "title": "Versions & Themes", - "path": "versions/index.html", - "sections": [ - { - "id": "", - "title": "Versions & Themes", - "content": "Versions & Themes There are a number of versions and themes for mkdocs." - }, - { - "id": "versions-themes", - "title": "Versions & Themes", - "content": "Versions & Themes There are a number of versions and themes for mkdocs." - } - ] - }, - { - "title": "no-title", - "path": "no-title/index.html", - "sections": [ - { - "id": "", - "title": "no-title", - "content": "No title" - } - ] - } -] diff --git a/readthedocs/search/tests/data/mkdocs/out/search_index_old.json b/readthedocs/search/tests/data/mkdocs/out/search_index_old.json deleted file mode 100644 index 0c0c6f39aa5..00000000000 --- a/readthedocs/search/tests/data/mkdocs/out/search_index_old.json +++ /dev/null @@ -1,34 +0,0 @@ -[ - { - "title": "Read the Docs MkDocs Test Project", - "path": "index.html", - "sections": [ - { - "id": "", - "title": "Read the Docs MkDocs Test Project", - "content": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs ." - }, - { - "id": "read-the-docs-mkdocs-test-project", - "title": "Read the Docs MkDocs Test Project", - "content": "Read the Docs MkDocs Test Project This is a test of MkDocs as it appears on Read the Docs ." - } - ] - }, - { - "title": "Versions & Themes", - "path": "versions/index.html", - "sections": [ - { - "id": "", - "title": "Versions & Themes", - "content": "Versions & Themes There are a number of versions and themes for mkdocs." - }, - { - "id": "versions-themes", - "title": "Versions & Themes", - "content": "Versions & Themes There are a number of versions and themes for mkdocs." - } - ] - } -] diff --git a/readthedocs/search/tests/test_parsers.py b/readthedocs/search/tests/test_parsers.py index 59e7fcb5e69..1e129f752e7 100644 --- a/readthedocs/search/tests/test_parsers.py +++ b/readthedocs/search/tests/test_parsers.py @@ -37,44 +37,6 @@ def f(*args, **kwargs): yield read_mock return f - @mock.patch.object(BuildMediaFileSystemStorage, 'exists') - @mock.patch.object(BuildMediaFileSystemStorage, 'open') - def test_mkdocs(self, storage_open, storage_exists): - json_file = data_path / 'mkdocs/in/search_index.json' - storage_open.side_effect = self._mock_open( - json_file.open().read() - ) - storage_exists.return_value = True - - self.version.documentation_type = MKDOCS - self.version.save() - - index_file = get( - HTMLFile, - project=self.project, - version=self.version, - path='index.html', - ) - versions_file = get( - HTMLFile, - project=self.project, - version=self.version, - path='versions/index.html', - ) - no_title_file = get( - HTMLFile, - project=self.project, - version=self.version, - path='no-title/index.html', - ) - - parsed_json = [ - index_file.processed_json, - versions_file.processed_json, - no_title_file.processed_json, - ] - expected_json = json.load(open(data_path / 'mkdocs/out/search_index.json')) - assert parsed_json == expected_json @mock.patch.object(BuildMediaFileSystemStorage, 'exists') @mock.patch.object(BuildMediaFileSystemStorage, 'open') @@ -199,37 +161,6 @@ def test_mkdocs_readthedocs_theme(self, storage_open, storage_exists): expected_json = json.load(open(data_path / 'mkdocs/out/readthedocs-1.1.json')) assert parsed_json == expected_json - @mock.patch.object(BuildMediaFileSystemStorage, 'exists') - @mock.patch.object(BuildMediaFileSystemStorage, 'open') - def test_mkdocs_old_version(self, storage_open, storage_exists): - json_file = data_path / 'mkdocs/in/search_index_old.json' - storage_open.side_effect = self._mock_open( - json_file.open().read() - ) - storage_exists.return_value = True - - self.version.documentation_type = MKDOCS - self.version.save() - - index_file = get( - HTMLFile, - project=self.project, - version=self.version, - path='index.html', - ) - versions_file = get( - HTMLFile, - project=self.project, - version=self.version, - path='versions/index.html', - ) - - parsed_json = [ - index_file.processed_json, - versions_file.processed_json, - ] - expected_json = json.load(open(data_path / 'mkdocs/out/search_index_old.json')) - assert parsed_json == expected_json @mock.patch.object(BuildMediaFileSystemStorage, 'exists') @mock.patch.object(BuildMediaFileSystemStorage, 'open')