diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index aac334b0320..be89304186a 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1540,6 +1540,7 @@ def add_features(sender, **kwargs): DEDUPLICATE_BUILDS = 'deduplicate_builds' USE_SPHINX_RTD_EXT_LATEST = 'rtd_sphinx_ext_latest' DEFAULT_TO_FUZZY_SEARCH = 'default_to_fuzzy_search' + INDEX_FROM_HTML_FILES = 'index_from_html_files' FEATURES = ( (USE_SPHINX_LATEST, _('Use latest version of Sphinx')), @@ -1661,6 +1662,10 @@ def add_features(sender, **kwargs): DEFAULT_TO_FUZZY_SEARCH, _('Default to fuzzy search for simple search queries'), ), + ( + INDEX_FROM_HTML_FILES, + _('Index content directly from html files instead or relying in other sources'), + ), ) projects = models.ManyToManyField( diff --git a/readthedocs/search/parsers.py b/readthedocs/search/parsers.py index 4b1b33f8e97..fa17d2bfe2f 100644 --- a/readthedocs/search/parsers.py +++ b/readthedocs/search/parsers.py @@ -21,6 +21,68 @@ def __init__(self, version): self.project = self.version.project self.storage = get_storage_class(settings.RTD_BUILD_MEDIA_STORAGE)() + def _get_page_content(self, page): + """Gets the page content from storage.""" + content = None + try: + storage_path = self.project.get_storage_path( + type_='html', + version_slug=self.version.slug, + include_file=False, + ) + file_path = self.storage.join(storage_path, page) + with self.storage.open(file_path, mode='r') as f: + content = f.read() + except Exception: + log.warning( + 'Unhandled exception during search processing file: %s', + page, + ) + return content + + def _get_page_title(self, body, html): + """ + Gets the title from the html page. + + The title is the first section in the document, + falling back to the ``title`` tag. + """ + first_header = body.css_first('h1') + if first_header: + title, _ = self._parse_section_title(first_header) + return title + + title = html.css_first('title') + if title: + return self._parse_content(title.text()) + + return None + + def _get_main_node(self, html): + """ + Gets the main node from where to start indexing content. + + The main node is tested in the following order: + + - Try with a tag with the ``main`` role. + This role is used by several static sites and themes. + - Try the first ``h1`` node and return its parent + Usually all sections are neighbors, + so they are children of the same parent node. + """ + body = html.body + main_node = body.css_first('[role=main]') + if main_node: + return main_node + + # TODO: this could be done in smarter way, + # checking for common parents between all h nodes. + first_header = body.css_first('h1') + if first_header: + return first_header.parent + + return None + def _parse_content(self, content): """Removes new line characters and strips all whitespaces.""" content = content.strip().split('\n') @@ -404,9 +466,56 @@ def _parse_domain_tag(self, tag): class MkDocsParser(BaseParser): - """MkDocs parser, it relies on the json index files.""" + """ + MkDocs parser. + + Index from the json index file or directly from the html content. + """ def parse(self, page): + # Avoid circular import + from readthedocs.projects.models import Feature + if self.project.has_feature(Feature.INDEX_FROM_HTML_FILES): + return self.parse_from_html(page) + return self.parse_from_index_file(page) + + def parse_from_html(self, page): + try: + content = self._get_page_content(page) + if content: + return self._process_content(page, content) + except Exception as e: + log.info('Failed to index page %s, %s', page, str(e)) + return { + 'path': page, + 'title': '', + 'sections': [], + 'domain_data': {}, + } + + def _process_content(self, page, content): + """Parses the content into a structured dict.""" + html = HTMLParser(content) + body = self._get_main_node(html) + title = "" + sections = [] + if body: + title = self._get_page_title(body, html) or page + sections = list(self._parse_sections(title, body)) + else: + log.info( + 'Page doesn\'t look like it has valid content, skipping. ' + 'page=%s', + page, + ) + return { + 'path': page, + 'title': title, + 'sections': sections, + 'domain_data': {}, + } + + def parse_from_index_file(self, page): storage_path = self.project.get_storage_path( type_='html', version_slug=self.version.slug, diff --git a/readthedocs/search/tests/data/mkdocs/in/gitbook/index.html b/readthedocs/search/tests/data/mkdocs/in/gitbook/index.html new file mode 100644 index 00000000000..45ea0014639 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/gitbook/index.html @@ -0,0 +1,121 @@ + + + + + + + + + Mkdocs - GitBook Theme - Mkdocs - GitBook Theme + + + + + + + + + + + + + + +
+
+ + + +
+ +
+
+ + +
+
+ +
+ +

Mkdocs - GitBook Theme

+

Apache 2.0 License + PyPI

+

Installation

+

First, install the package via PyPI:

+
pip install mkdocs-gitbook
+                
+ +

Then include the theme in your mkdocs.yml file:

+
theme:
+  name: gitbook
+                
+ +

Motivation

+

Gitbook was a static-site generator written in JavaScript.

+

Mkdocs is a static-site generator written in Python.

+

Gitbook is no longer a static-site generator, nor does it use git, nor is it free or open source!

+

Screenshot

+

Default theme for GitBook for Mkdocs

+

License

+

SPDX-License-Identifier: Apache-2.0

+ + +
+ +
+
+ +
+ +
+ + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/material/index.html b/readthedocs/search/tests/data/mkdocs/in/material/index.html new file mode 100644 index 00000000000..dd04820579c --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/material/index.html @@ -0,0 +1,1504 @@ + + + + + + + + + + + + + + + + + + + + + + Overview of Pi-hole + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + Skip to content + + +
+
+ +
+ +
+ +
+ +
+ + + + + + +
+
+ + +
+
+
+ +
+
+
+ + +
+
+
+ + +
+
+
+ + +
+
+ + + + + + + + +

Overview

+ +

+ + Pi-hole + +
+ Network-wide ad blocking via your own Linux hardware +

+ +

The Pi-hole® is a DNS sinkhole that protects your devices from unwanted content, without installing any client-side software.

+
    +
  • Easy-to-install: our versatile installer walks you through the process, and takes less than ten minutes
  • +
  • Robust: a command-line interface that is quality assured for interoperability
  • +
  • Insightful: a beautiful responsive Web Interface dashboard to view and control your Pi-hole
  • +
  • Free: open-source software which helps ensure you are the sole person in control of your privacy
  • +
+
+

Pi-hole is free, but powered by your support

+

There are many reoccurring costs involved with maintaining free, open-source, and privacy respecting software; expenses which our volunteer developers pitch in to cover out-of-pocket. This is just one example of how strongly we feel about our software, as well as the importance of keeping it maintained.

+

Make no mistake: your support is absolutely vital to help keep us innovating!

+

Donations

+

Sending a donation using our links below is extremely helpful in offsetting a portion of our monthly expenses:

+ +

Alternative support

+

If you'd rather not donate (which is okay!), there are other ways you can help support us:

+

Contributing via GitHub

+

We welcome everyone to contribute to issue reports, suggest new features, and create pull requests.

+

If you have something to add - anything from a typo through to a whole new feature, we're happy to check it out! Just make sure to fill out our template when submitting your request; the questions that it asks will help the volunteers quickly understand what you're aiming to achieve.

+ + +
+
+ + + Last update: June 2, 2020 + + +
+ + + + + + + + +
+
+
+
+ + + + +
+ + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/404.html b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/404.html new file mode 100644 index 00000000000..cb12fa2543f --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/404.html @@ -0,0 +1,200 @@ + + + + + + + + + + + MkDocs + + + + + + + + + + + + + + + + + + + +
+
+ +
+
+

404

+

Page not found

+
+
+ + +
+
+ + + + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/configuration.html b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/configuration.html new file mode 100644 index 00000000000..93e3475efe0 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/configuration.html @@ -0,0 +1,325 @@ + + + + + + + + + + + Configuration - MkDocs + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +

Configuration

+

Guide to all available configuration settings.

+
+

Introduction

+

+ Project settings are always configured by using a YAML configuration file in the + project directory named mkdocs.yml.

+

+ As a minimum this configuration file must contain the site_name setting. All other settings are optional. +

+ +

site_description

+

Set the site description. This will add a meta tag to the generated HTML header.

+

default: null

+ +

site_author

+

+ Set the name of the author. This will add a meta tag to the generated HTML header. +

+ +

Preview controls

+

use_directory_urls

+

+ This setting controls the style used for linking to pages within the + documentation. +

+ + + + + + + + + + + + + + + + + + + + + + + + + +
Source fileuse_directory_urls: trueuse_directory_urls: false
index.md//index.html
api-guide.md/api-guide//api-guide.html
about/license.md/about/license//about/license.html
+ +
lang
+

+ A list of languages to use when building the search index as identified by their + ISO 639-1 language codes. +

+
    +
  • ar: Arabic
  • +
  • da: Danish
  • +
  • nl: Dutch
  • +
  • en: English
  • +
  • fi: Finnish
  • +
  • fr: French
  • +
  • de: German
  • +
  • hu: Hungarian
  • +
  • it: Italian
  • +
  • ja: Japanese
  • +
  • no: Norwegian
  • +
  • pt: Portuguese
  • +
  • ro: Romanian
  • +
  • ru: Russian
  • +
  • es: Spanish
  • +
  • sv: Swedish
  • +
  • th: Thai
  • +
  • tr: Turkish
  • +
  • vi: Vietnamese
  • +
+
+
+
+ + + + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/index.html b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/index.html new file mode 100644 index 00000000000..502d3d01ff6 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/index.html @@ -0,0 +1,316 @@ + + + + + + + + + + + + + + MkDocs + + + + + + + + + + + + + + + + + + + +
+
+
+
+ +

MkDocs

+

Project documentation with Markdown.

+ +
+ +

Overview

+

+ MkDocs is a fast, simple and downright gorgeous static site + generator that's geared towards building project documentation. +

+ +

Host anywhere

+

+ MkDocs builds completely static HTML sites that you can host on GitHub pages, + Amazon S3, or anywhere else you choose. +

+ +

Great themes available

+

+ There's a stack of good looking themes available for MkDocs. +

+ +
+ +

Installation

+

Install with a Package Manager

+

+ If your package manager does not have a recent "MkDocs" package, you can still + use your package manager to install "Python" and "pip". +

+
$ python --version
+Python 3.8.2
+$ pip --version
+pip 20.0.2 from /usr/local/lib/python3.8/site-packages/pip (python 3.8)
+            
+ +
+

Note

+

If you would like manpages installed for MkDocs, the click-man tool can + generate and install them for you. Simply run the following two commands:

+
pip install click-man
+click-man --target path/to/man/pages mkdocs
+              
+
+
+
+ + + + + + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-main-header.html b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-main-header.html new file mode 100644 index 00000000000..613f09592e0 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-main-header.html @@ -0,0 +1,183 @@ + + + + + + + + + + + + No main header - Read the Docs MkDocs Test + + + + + + + + + + + + + + + + +
+
+
+
+ +

I don't start with a header.

+

I'm the header

+

I'm more content.

+
+
+ +
+
+

Documentation built with MkDocs.

+
+ + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-title.html b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-title.html new file mode 100644 index 00000000000..fc4413021b3 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/mkdocs-1.1/no-title.html @@ -0,0 +1,185 @@ + + + + + + + + + + + + + No title - Read the Docs MkDocs Test + + + + + + + + + + + + + + + + +
+
+
+
+ +

This file doesn't have a header, + but it does have a content.

+
    +
  • 1
  • +
  • 2
  • +
  • 3
  • +
+

Another paragraph

+
+
+ +
+
+

Documentation built with MkDocs.

+
+ + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/404.html b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/404.html new file mode 100644 index 00000000000..e8eed888de6 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/404.html @@ -0,0 +1,131 @@ + + + + + + + + + + + + Read the Docs MkDocs Test + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+
+
+
    +
  • Docs »
  • + + +
  • + +
  • +
+ +
+
+
+
+ + +

404

+ +

Page not found

+ + +
+
+ + +
+
+ +
+ +
+ +
+ + + + + +
+ + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/index.html b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/index.html new file mode 100644 index 00000000000..5f610ec33b5 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/index.html @@ -0,0 +1,175 @@ + + + + + + + + + + + + + + + + Read the Docs MkDocs Test + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+
+
+
    +
  • Docs »
  • + + + +
  • Read the Docs MkDocs Test Project
  • +
  • + +
  • +
+ +
+
+
+
+ +

Read the Docs MkDocs Test Project

+

This is a test of MkDocs as it appears on Read the Docs. + Different versions of this documentation are build with different versions and themes of mkdocs.

+

Use the version selector menu in the lower right to change the version and theme of this documentation

+

Sub header

+

Some content:

+
    +
  • a
  • +
  • b
  • +
  • c
  • +
+

Another title

+

I'm another title

+

I'm a subtitle

+

Another content, mkdocs is great!

+ +
+
+ + +
+
+ +
+ +
+ +
+ + + + + Next » + + +
+ + + + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/versions.html b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/versions.html new file mode 100644 index 00000000000..55a9b3be75c --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/readthedocs-1.1/versions.html @@ -0,0 +1,151 @@ + + + + + + + + + + + + Versions & Themes - Read the Docs MkDocs Test + + + + + + + + + + + + + + + + + + + +
+ + + + +
+ + + + + +
+
+
+
    +
  • Docs »
  • + + + +
  • Versions & Themes
  • +
  • + +
  • +
+ +
+
+
+
+ +

Versions & Themes

+

There are a number of versions and themes for mkdocs + each of which have slight nuances with how they affect Read the Docs.

+

You can use the version menu in the lower right to switch + between versions and theme for this documentation.

+ +
+
+ + +
+
+ +
+ +
+ +
+ + + + « Previous + + + +
+ + + + + + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/in/windmill/index.html b/readthedocs/search/tests/data/mkdocs/in/windmill/index.html new file mode 100644 index 00000000000..e729b15b45a --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/in/windmill/index.html @@ -0,0 +1,239 @@ + + + + + + + + + + + + + + + Windmill MkDocs Theme + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + +
+ +
+
+ +
+ + + + + + + + + + + + +

Windmill theme#

+

About#

+

Windmill theme focuses on clean usable navigation for large documentation + projects. It retains the state of the menu of pages and folders across page + transitions, by keeping navigation to an iframe.

+

Within pages, it uses the default mkdocs theme, including syntax highlighting.

+ +

Installation#

+

Install the Windmill theme using pip:

+
pip install mkdocs-windmill
+      
+ +

Usage#

+

To use the Windmill theme installed via pip, add this to your mkdocs.yml:

+
theme: 'windmill'
+      
+ +

If you cloned Windmill from GitHub:

+
theme:
+  name: null
+  custom_dir: '{INSTALL_DIR}/mkdocs_windmill'
+  # Copy settings from mkdocs_theme.yml, which is ignored by custom_dir themes.
+  static_templates: [404.html]
+  search_index_only: true
+  include_search_page: true
+      
+ +

Note that it's important for there to exist a homepage, e.g. a top-level root element in mkdocs 1.0+:

+
nav:
+  - Home: index.md
+      
+ + + +
+
+ + + + + + diff --git a/readthedocs/search/tests/data/mkdocs/out/gitbook.json b/readthedocs/search/tests/data/mkdocs/out/gitbook.json new file mode 100644 index 00000000000..c22e6b29f3c --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/out/gitbook.json @@ -0,0 +1,34 @@ +[ + { + "path": "index.html", + "title": "Mkdocs - GitBook Theme", + "sections": [ + { + "id": "mkdocs-gitbook-theme", + "title": "Mkdocs - GitBook Theme", + "content": "" + }, + { + "id": "installation", + "title": "Installation", + "content": "First, install the package via PyPI: pip install mkdocs-gitbook Then include the theme in your mkdocs.yml file: theme: name: gitbook" + }, + { + "id": "motivation", + "title": "Motivation", + "content": "Gitbook was a static-site generator written in JavaScript. Mkdocs is a static-site generator written in Python. Gitbook is no longer a static-site generator, nor does it use git, nor is it free or open source!" + }, + { + "id": "screenshot", + "title": "Screenshot", + "content": "" + }, + { + "id": "license", + "title": "License", + "content": "SPDX-License-Identifier: Apache-2.0" + } + ], + "domain_data": {} + } +] diff --git a/readthedocs/search/tests/data/mkdocs/out/material.json b/readthedocs/search/tests/data/mkdocs/out/material.json new file mode 100644 index 00000000000..5e71b97f64c --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/out/material.json @@ -0,0 +1,34 @@ +[ + { + "path": "index.html", + "title": "Overview", + "sections": [ + { + "id": "", + "title": "Overview", + "content": "Network-wide ad blocking via your own Linux hardware The Pi-hole® is a DNS sinkhole that protects your devices from unwanted content, without installing any client-side software. Easy-to-install: our versatile installer walks you through the process, and takes less than ten minutes Robust: a command-line interface that is quality assured for interoperability Insightful: a beautiful responsive Web Interface dashboard to view and control your Pi-hole Free: open-source software which helps ensure you are the sole person in control of your privacy" + }, + { + "id": "pi-hole-is-free-but-powered-by-your-support", + "title": "Pi-hole is free, but powered by your support", + "content": "There are many reoccurring costs involved with maintaining free, open-source, and privacy respecting software; expenses which our volunteer developers pitch in to cover out-of-pocket. This is just one example of how strongly we feel about our software, as well as the importance of keeping it maintained. Make no mistake: your support is absolutely vital to help keep us innovating!" + }, + { + "id": "donations", + "title": "Donations", + "content": "Sending a donation using our links below is extremely helpful in offsetting a portion of our monthly expenses: Donate via PayPal or Stripe Bitcoin, Bitcoin Cash, Ethereum, Litecoin" + }, + { + "id": "alternative-support", + "title": "Alternative support", + "content": "If you'd rather not donate (which is okay!), there are other ways you can help support us:" + }, + { + "id": "contributing-via-github", + "title": "Contributing via GitHub", + "content": "We welcome everyone to contribute to issue reports, suggest new features, and create pull requests. If you have something to add - anything from a typo through to a whole new feature, we're happy to check it out! Just make sure to fill out our template when submitting your request; the questions that it asks will help the volunteers quickly understand what you're aiming to achieve. Last update: June 2, 2020" + } + ], + "domain_data": {} + } +] diff --git a/readthedocs/search/tests/data/mkdocs/out/mkdocs-1.1.json b/readthedocs/search/tests/data/mkdocs/out/mkdocs-1.1.json new file mode 100644 index 00000000000..f30020312b0 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/out/mkdocs-1.1.json @@ -0,0 +1,122 @@ +[ + { + "path": "index.html", + "title": "MkDocs", + "sections": [ + { + "id": "mkdocs", + "title": "MkDocs", + "content": "Project documentation with\u00a0Markdown." + }, + { + "id": "overview", + "title": "Overview", + "content": "MkDocs is a fast, simple and downright gorgeous static site generator that's geared towards building project documentation." + }, + { + "id": "installation", + "title": "Installation", + "content": "" + }, + { + "id": "host-anywhere", + "title": "Host anywhere", + "content": "MkDocs builds completely static HTML sites that you can host on GitHub pages, Amazon S3, or anywhere else you choose." + }, + { + "id": "great-themes-available", + "title": "Great themes available", + "content": "There's a stack of good looking themes available for MkDocs." + }, + { + "id": "install-with-a-package-manager", + "title": "Install with a Package Manager", + "content": "If your package manager does not have a recent \"MkDocs\" package, you can still use your package manager to install \"Python\" and \"pip\". $ python --version Python 3.8.2 $ pip --version pip 20.0.2 from /usr/local/lib/python3.8/site-packages/pip (python 3.8) Note If you would like manpages installed for MkDocs, the click-man tool can generate and install them for you. Simply run the following two commands: pip install click-man click-man --target path/to/man/pages mkdocs" + } + ], + "domain_data": {} + }, + { + "path": "404.html", + "title": "404", + "sections": [ + { + "id": "404-page-not-found", + "title": "404", + "content": "Page not found" + } + ], + "domain_data": {} + }, + { + "path": "configuration.html", + "title": "Configuration", + "sections": [ + { + "id": "configuration", + "title": "Configuration", + "content": "Guide to all available configuration settings." + }, + { + "id": "introduction", + "title": "Introduction", + "content": "Project settings are always configured by using a YAML configuration file in the project directory named mkdocs.yml. As a minimum this configuration file must contain the site_name setting. All other settings are optional." + }, + { + "id": "preview-controls", + "title": "Preview controls", + "content": "" + }, + { + "id": "site_description", + "title": "site_description", + "content": "Set the site description. This will add a meta tag to the generated HTML header. default: null" + }, + { + "id": "site_author", + "title": "site_author", + "content": "Set the name of the author. This will add a meta tag to the generated HTML header." + }, + { + "id": "use_directory_urls", + "title": "use_directory_urls", + "content": "This setting controls the style used for linking to pages within the documentation. Source file use_directory_urls: true use_directory_urls: false index.md / /index.html api-guide.md /api-guide/ /api-guide.html about/license.md /about/license/ /about/license.html" + }, + { + "id": "lang", + "title": "lang", + "content": "A list of languages to use when building the search index as identified by their ISO 639-1 language codes. ar: Arabic da: Danish nl: Dutch en: English fi: Finnish fr: French de: German hu: Hungarian it: Italian ja: Japanese no: Norwegian pt: Portuguese ro: Romanian ru: Russian es: Spanish sv: Swedish th: Thai tr: Turkish vi: Vietnamese" + } + ], + "domain_data": {} + }, + { + "path": "no-title.html", + "title": "No title - Read the Docs MkDocs Test", + "sections": [ + { + "id": "", + "title": "No title - Read the Docs MkDocs Test", + "content": "This file doesn't have a header, but it does have a content. 1 2 3 Another paragraph" + } + ], + "domain_data": {} + }, + { + "path": "no-main-header.html", + "title": "I'm the header", + "sections": [ + { + "id": "", + "title": "I'm the header", + "content": "I don't start with a header." + }, + { + "id": "im-the-header", + "title": "I'm the header", + "content": "I'm more content." + } + ], + "domain_data": {} + } +] diff --git a/readthedocs/search/tests/data/mkdocs/out/readthedocs-1.1.json b/readthedocs/search/tests/data/mkdocs/out/readthedocs-1.1.json new file mode 100644 index 00000000000..51487a4475a --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/out/readthedocs-1.1.json @@ -0,0 +1,53 @@ +[ + { + "path": "index.html", + "title": "Read the Docs MkDocs Test Project", + "sections": [ + { + "id": "read-the-docs-mkdocs-test-project", + "title": "Read the Docs MkDocs Test Project", + "content": "This is a test of MkDocs as it appears on Read the Docs. Different versions of this documentation are build with different versions and themes of mkdocs. Use the version selector menu in the lower right to change the version and theme of this documentation" + }, + { + "id": "another-title", + "title": "Another title", + "content": "I'm another title" + }, + { + "id": "sub-header", + "title": "Sub header", + "content": "Some content: a b c" + }, + { + "id": "im-a-subtitle", + "title": "I'm a subtitle", + "content": "Another content, mkdocs is great!" + } + ], + "domain_data": {} + }, + { + "path": "404.html", + "title": "404", + "sections": [ + { + "id": "404-page-not-found", + "title": "404", + "content": "Page not found" + } + ], + "domain_data": {} + }, + { + "path": "versions.html", + "title": "Versions & Themes", + "sections": [ + { + "id": "versions-themes", + "title": "Versions & Themes", + "content": "There are a number of versions and themes for mkdocs each of which have slight nuances with how they affect Read the Docs. You can use the version menu in the lower right to switch between versions and theme for this documentation." + } + ], + "domain_data": {} + } +] diff --git a/readthedocs/search/tests/data/mkdocs/out/windmill.json b/readthedocs/search/tests/data/mkdocs/out/windmill.json new file mode 100644 index 00000000000..8f0f6726064 --- /dev/null +++ b/readthedocs/search/tests/data/mkdocs/out/windmill.json @@ -0,0 +1,29 @@ +[ + { + "path": "index.html", + "title": "Windmill theme", + "sections": [ + { + "id": "windmill-theme", + "title": "Windmill theme", + "content": "" + }, + { + "id": "about", + "title": "About", + "content": "Windmill theme focuses on clean usable navigation for large documentation projects. It retains the state of the menu of pages and folders across page transitions, by keeping navigation to an iframe. Within pages, it uses the default mkdocs theme, including syntax highlighting." + }, + { + "id": "installation", + "title": "Installation", + "content": "Install the Windmill theme using pip: pip install mkdocs-windmill" + }, + { + "id": "usage", + "title": "Usage", + "content": "To use the Windmill theme installed via pip, add this to your mkdocs.yml: theme: 'windmill' If you cloned Windmill from GitHub: theme: name: null custom_dir: '{INSTALL_DIR}/mkdocs_windmill' # Copy settings from mkdocs_theme.yml, which is ignored by custom_dir themes. static_templates: [404.html] search_index_only: true include_search_page: true Note that it's important for there to exist a homepage, e.g. a top-level root element in mkdocs 1.0+: nav: - Home: index.md" + } + ], + "domain_data": {} + } +] diff --git a/readthedocs/search/tests/test_parsers.py b/readthedocs/search/tests/test_parsers.py index e5ab1927da3..8f47703a962 100644 --- a/readthedocs/search/tests/test_parsers.py +++ b/readthedocs/search/tests/test_parsers.py @@ -8,16 +8,20 @@ from readthedocs.builds.storage import BuildMediaFileSystemStorage from readthedocs.projects.constants import MKDOCS, SPHINX -from readthedocs.projects.models import HTMLFile, Project +from readthedocs.projects.models import HTMLFile, Project, Feature data_path = Path(__file__).parent.resolve() / 'data' @pytest.mark.django_db @pytest.mark.search -class TestParseJSON: +class TestParsers: def setup_method(self): + self.feature = get( + Feature, + feature_id=Feature.INDEX_FROM_HTML_FILES, + ) self.project = get( Project, slug='test', @@ -72,6 +76,129 @@ def test_mkdocs(self, storage_open, storage_exists): expected_json = json.load(open(data_path / 'mkdocs/out/search_index.json')) assert parsed_json == expected_json + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_mkdocs_default_theme(self, storage_open, storage_exists): + local_path = data_path / 'mkdocs/in/mkdocs-1.1/' + storage_exists.return_value = True + + self.project.feature_set.add(self.feature) + self.version.documentation_type = MKDOCS + self.version.save() + + parsed_json = [] + + all_files = [ + 'index.html', + '404.html', + 'configuration.html', + 'no-title.html', + 'no-main-header.html', + ] + for file_name in all_files: + file = local_path / file_name + storage_open.reset_mock() + storage_open.side_effect = self._mock_open(file.open().read()) + file = get( + HTMLFile, + project=self.project, + version=self.version, + path=file_name, + ) + parsed_json.append(file.processed_json) + + expected_json = json.load(open(data_path / 'mkdocs/out/mkdocs-1.1.json')) + assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_mkdocs_gitbook_theme(self, storage_open, storage_exists): + file = data_path / 'mkdocs/in/gitbook/index.html' + storage_exists.return_value = True + + self.project.feature_set.add(self.feature) + self.version.documentation_type = MKDOCS + self.version.save() + + storage_open.side_effect = self._mock_open(file.open().read()) + file = get( + HTMLFile, + project=self.project, + version=self.version, + path='index.html', + ) + parsed_json = [file.processed_json] + expected_json = json.load(open(data_path / 'mkdocs/out/gitbook.json')) + assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_mkdocs_material_theme(self, storage_open, storage_exists): + file = data_path / 'mkdocs/in/material/index.html' + storage_exists.return_value = True + + self.project.feature_set.add(self.feature) + self.version.documentation_type = MKDOCS + self.version.save() + + storage_open.side_effect = self._mock_open(file.open().read()) + file = get( + HTMLFile, + project=self.project, + version=self.version, + path='index.html', + ) + parsed_json = [file.processed_json] + expected_json = json.load(open(data_path / 'mkdocs/out/material.json')) + assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_mkdocs_windmill_theme(self, storage_open, storage_exists): + file = data_path / 'mkdocs/in/windmill/index.html' + storage_exists.return_value = True + + self.project.feature_set.add(self.feature) + self.version.documentation_type = MKDOCS + self.version.save() + + storage_open.side_effect = self._mock_open(file.open().read()) + file = get( + HTMLFile, + project=self.project, + version=self.version, + path='index.html', + ) + parsed_json = [file.processed_json] + expected_json = json.load(open(data_path / 'mkdocs/out/windmill.json')) + assert parsed_json == expected_json + + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') + @mock.patch.object(BuildMediaFileSystemStorage, 'open') + def test_mkdocs_readthedocs_theme(self, storage_open, storage_exists): + self.project.feature_set.add(self.feature) + storage_exists.return_value = True + self.version.documentation_type = MKDOCS + self.version.save() + + local_path = data_path / 'mkdocs/in/readthedocs-1.1/' + parsed_json = [] + + for file_name in ['index.html', '404.html', 'versions.html']: + file = local_path / file_name + storage_open.reset_mock() + storage_open.side_effect = self._mock_open(file.open().read()) + file = get( + HTMLFile, + project=self.project, + version=self.version, + path=file_name, + ) + parsed_json.append(file.processed_json) + + expected_json = json.load(open(data_path / 'mkdocs/out/readthedocs-1.1.json')) + assert parsed_json == expected_json + @mock.patch.object(BuildMediaFileSystemStorage, 'exists') @mock.patch.object(BuildMediaFileSystemStorage, 'open') def test_mkdocs_old_version(self, storage_open, storage_exists):