readthedocs · stsewd · Jul 6, 2023 · Jul 6, 2023 · Jul 6, 2023
@@ -51,7 +51,7 @@
     validate_repository_url,
 )
 from readthedocs.projects.version_handling import determine_stable_version
-from readthedocs.search.parsers import GenericParser, MkDocsParser, SphinxParser
+from readthedocs.search.parsers import GenericParser, SphinxParser
 from readthedocs.storage import build_media_storage
 from readthedocs.vcs_support.backends import backend_cls
 
@@ -1527,13 +1527,12 @@ class Meta:
     def get_processed_json(self):
         if (
             self.version.documentation_type == constants.GENERIC
+            or self.version.is_mkdocs_type
             or self.project.has_feature(Feature.INDEX_FROM_HTML_FILES)
         ):
             parser_class = GenericParser
         elif self.version.is_sphinx_type:
             parser_class = SphinxParser
-        elif self.version.is_mkdocs_type:
-            parser_class = MkDocsParser
         else:
             log.warning(
                 "Invalid documentation type",

@@ -3,7 +3,6 @@
 import itertools
 import os
 import re
-from urllib.parse import urlparse
 
 import orjson as json
 import structlog
@@ -543,87 +542,3 @@ def _clean_body(self, body):
             node.decompose()
 
         return body
-
-
-class MkDocsParser(GenericParser):
-
-    """
-    MkDocs parser.
-
-    Index using the json index file instead of the html content.
-    """
-
-    def parse(self, page):
-        storage_path = self.project.get_storage_path(
-            type_='html',
-            version_slug=self.version.slug,
-            include_file=False,
-        )
-        try:
-            file_path = self.storage.join(storage_path, 'search/search_index.json')
-            if self.storage.exists(file_path):
-                index_data = self._process_index_file(file_path, page=page)
-                if index_data:
-                    return index_data
-        except Exception:
-            log.warning(
-                'Unhandled exception during search processing file.',
-                page=page,
-            )
-        return {
-            'path': page,
-            'title': '',
-            'sections': [],
-        }
-
-    def _process_index_file(self, json_path, page):
-        """Reads the json index file and parses it into a structured dict."""
-        try:
-            with self.storage.open(json_path, mode='r') as f:
-                file_contents = f.read()
-        except IOError:
-            log.info('Unable to read file.', path=json_path)
-            raise
-
-        data = json.loads(file_contents)
-        page_data = {}
-
-        for section in data.get('docs', []):
-            parsed_path = urlparse(section.get('location', ''))
-            fragment = parsed_path.fragment
-            path = parsed_path.path
-
-            # Some old versions of mkdocs
-            # index the pages as ``/page.html`` instead of ``page.html``.
-            path = path.lstrip('/')
-
-            if path == '' or path.endswith('/'):
-                path += 'index.html'
-
-            if page != path:
-                continue
-
-            title = self._parse_content(
-                HTMLParser(section.get('title')).text()
-            )
-            content = self._parse_content(
-                HTMLParser(section.get('text')).text()
-            )
-
-            # If it doesn't have a fragment,
-            # it means is the page itself.
-            if not fragment:
-                page_data.update({
-                    'path': path,
-                    'title': title,
-                })
-            # Content without a fragment need to be indexed as well,
-            # this happens when the page doesn't start with a header,
-            # or if it doesn't contain any headers at all.
-            page_data.setdefault('sections', []).append({
-                'id': fragment,
-                'title': title,
-                'content': content,
-            })
-
-        return page_data
@@ -37,44 +37,6 @@ def f(*args, **kwargs):
             yield read_mock
         return f
 
-    @mock.patch.object(BuildMediaFileSystemStorage, 'exists')
-    @mock.patch.object(BuildMediaFileSystemStorage, 'open')
-    def test_mkdocs(self, storage_open, storage_exists):
-        json_file = data_path / 'mkdocs/in/search_index.json'
-        storage_open.side_effect = self._mock_open(
-            json_file.open().read()
-        )
-        storage_exists.return_value = True
-
-        self.version.documentation_type = MKDOCS
-        self.version.save()
-
-        index_file = get(
-            HTMLFile,
-            project=self.project,
-            version=self.version,
-            path='index.html',
-        )
-        versions_file = get(
-            HTMLFile,
-            project=self.project,
-            version=self.version,
-            path='versions/index.html',
-        )
-        no_title_file = get(
-            HTMLFile,
-            project=self.project,
-            version=self.version,
-            path='no-title/index.html',
-        )
-
-        parsed_json = [
-            index_file.processed_json,
-            versions_file.processed_json,
-            no_title_file.processed_json,
-        ]
-        expected_json = json.load(open(data_path / 'mkdocs/out/search_index.json'))
-        assert parsed_json == expected_json
 
     @mock.patch.object(BuildMediaFileSystemStorage, 'exists')
     @mock.patch.object(BuildMediaFileSystemStorage, 'open')
@@ -199,37 +161,6 @@ def test_mkdocs_readthedocs_theme(self, storage_open, storage_exists):
         expected_json = json.load(open(data_path / 'mkdocs/out/readthedocs-1.1.json'))
         assert parsed_json == expected_json
 
-    @mock.patch.object(BuildMediaFileSystemStorage, 'exists')
-    @mock.patch.object(BuildMediaFileSystemStorage, 'open')
-    def test_mkdocs_old_version(self, storage_open, storage_exists):
-        json_file = data_path / 'mkdocs/in/search_index_old.json'
-        storage_open.side_effect = self._mock_open(
-            json_file.open().read()
-        )
-        storage_exists.return_value = True
-
-        self.version.documentation_type = MKDOCS
-        self.version.save()
-
-        index_file = get(
-            HTMLFile,
-            project=self.project,
-            version=self.version,
-            path='index.html',
-        )
-        versions_file = get(
-            HTMLFile,
-            project=self.project,
-            version=self.version,
-            path='versions/index.html',
-        )
-
-        parsed_json = [
-            index_file.processed_json,
-            versions_file.processed_json,
-        ]
-        expected_json = json.load(open(data_path / 'mkdocs/out/search_index_old.json'))
-        assert parsed_json == expected_json
 
     @mock.patch.object(BuildMediaFileSystemStorage, 'exists')
     @mock.patch.object(BuildMediaFileSystemStorage, 'open')