diff --git a/dev-requirements.txt b/dev-requirements.txt index 4e642f5..dfef3bc 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,5 +1,6 @@ -sphinx +sphinx==3.1.2 wheel==0.34.2 pytest==5.4.3 beautifulsoup4==4.9.1 -setuptools==47.3.1 \ No newline at end of file +setuptools==49.2.0 +docutils==0.16 \ No newline at end of file diff --git a/sphinxext/opengraph.py b/old/opengraph.py similarity index 100% rename from sphinxext/opengraph.py rename to old/opengraph.py diff --git a/sphinxext/opengraph/__init__.py b/sphinxext/opengraph/__init__.py new file mode 100644 index 0000000..6a6bc00 --- /dev/null +++ b/sphinxext/opengraph/__init__.py @@ -0,0 +1,23 @@ +from typing import Any, Dict +from sphinx.application import Sphinx +from .event_handler import html_page_context + +# todo: rtd option to grab READTHEDOCS_LANGUAGE and READTHEDOCS_VERSION for link +# todo: grab first image on a page + + +def setup(app: Sphinx) -> Dict[str, Any]: + app.add_config_value("ogp_site_url", None, "html") + app.add_config_value("ogp_description_length", 200, "html") + app.add_config_value("ogp_image", None, "html") + app.add_config_value("ogp_image_alt", True, "html") + app.add_config_value("ogp_type", "website", "html") + app.add_config_value("ogp_site_name", None, "html") + app.add_config_value("ogp_custom_meta_tags", [], "html") + + app.connect("html-page-context", html_page_context) + + return { + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/sphinxext/opengraph/directive.py b/sphinxext/opengraph/directive.py new file mode 100644 index 0000000..eb3c44b --- /dev/null +++ b/sphinxext/opengraph/directive.py @@ -0,0 +1,10 @@ +from typing import List +from sphinx.util.docutils import SphinxDirective +from docutils.nodes import Node + + +class OpenGraphDirective(SphinxDirective): + def run(self) -> List[Node]: + pass + + pass diff --git a/sphinxext/opengraph/event_handler.py b/sphinxext/opengraph/event_handler.py new file mode 100644 index 0000000..6eed6f2 --- /dev/null +++ b/sphinxext/opengraph/event_handler.py @@ -0,0 +1,46 @@ +from typing import Dict, Any +from sphinx.application import Sphinx +from docutils import nodes +from urllib.parse import urljoin +from .util import sanitize_title, make_tag, add_tag_from_config +from .visitor import OpenGraphVisitor + + +def html_page_context( + app: Sphinx, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree: nodes.document, +) -> None: + if not doctree: + return + # Set length of description + try: + desc_len = int(app.config["ogp_description_length"]) + except ValueError: + desc_len = 200 + + # grab the description from the page + visitor = OpenGraphVisitor(doctree, desc_len) + doctree.walkabout(visitor) + + # parse out html from the page title + page_title = sanitize_title(context["title"]) + + # get the page's url + page_url = urljoin( + app.config["ogp_site_url"], context["pagename"] + context["file_suffix"] + ) + + # add all the tags which need to be added + context["metatags"] += make_tag("title", page_title) + context["metatags"] += make_tag("type", app.config["ogp_type"]) + context["metatags"] += make_tag("url", page_url) + add_tag_from_config(context, app.config, "site_name") + context["metatags"] += make_tag("description", visitor.description) + add_tag_from_config(context, app.config, "image") + add_tag_from_config(context, app.config, "image:alt") + + # custom tags + context["metatags"] += "\n".join(app.config["ogp_custom_meta_tags"]) diff --git a/sphinxext/opengraph/html_parser.py b/sphinxext/opengraph/html_parser.py new file mode 100644 index 0000000..01dd8a7 --- /dev/null +++ b/sphinxext/opengraph/html_parser.py @@ -0,0 +1,15 @@ +from html.parser import HTMLParser + + +class HTMLTextParser(HTMLParser): + """ + Parse HTML into text + """ + + def __init__(self): + super().__init__() + # Text from html tags + self.text = "" + + def handle_data(self, data) -> None: + self.text += data diff --git a/sphinxext/opengraph/util.py b/sphinxext/opengraph/util.py new file mode 100644 index 0000000..6933dfe --- /dev/null +++ b/sphinxext/opengraph/util.py @@ -0,0 +1,29 @@ +from typing import Dict, Any +from sphinx.config import Config +from .html_parser import HTMLTextParser + + +def make_tag(tag: str, content: str) -> str: + return f'\n ' + + +def add_tag_from_config( + context: Dict[str, Any], config: Config, tag: str, conf_name: str = "" +): + if not conf_name: + conf_name = tag.replace(":", "_") + + conf_name = f"ogp_{conf_name}" + + value = config[conf_name] + if value: + context["metatags"] += make_tag(tag, value) + + +def sanitize_title(title: str) -> str: + # take in a title and return a clean version. + html_parser = HTMLTextParser() + html_parser.feed(title) + html_parser.close() + + return html_parser.text diff --git a/sphinxext/opengraph/visitor.py b/sphinxext/opengraph/visitor.py new file mode 100644 index 0000000..66cc08a --- /dev/null +++ b/sphinxext/opengraph/visitor.py @@ -0,0 +1,63 @@ +from docutils import nodes + + +class OpenGraphVisitor(nodes.GenericNodeVisitor): + def __init__( + self, document: nodes.document, desc_len: int, title_count: int = 1 + ) -> None: + super().__init__(document) + self.description = "" + self.desc_len = desc_len + self.title_count = title_count + self.first_list = "" # todo: rename + + def default_visit(self, node: nodes.Element) -> None: + if len(self.description) >= self.desc_len: + # Stop the traversal if the description is long enough + # raise nodes.StopTraversal + pass + + if isinstance(node, (nodes.Invisible, nodes.Admonition)): + # Skip all comments and admonitions + raise nodes.SkipNode + + if isinstance(node, nodes.title): + # title count refers to the amount of titles remaining + self.title_count -= 1 + if self.title_count < 0: + # raise nodes.StopTraversal + pass + + if not self.first_list and isinstance( + node, (nodes.bullet_list, nodes.enumerated_list) + ): + # get the first list on a page to be used instead of description + self.first_list = node.astext().replace("\n\n", ", ") + + if isinstance(node, nodes.paragraph): + # Add only paragraphs to the description + + if self.description: + # add a space between paragraphs in case one does not exist + # if there is a double space it will be removed automatically + self.description += " " + + self.description += node.astext() + + def default_departure(self, node: nodes.Element) -> None: + # remove all double spaces + self.description = self.description.replace(" ", " ") + + if len(self.description) > self.desc_len: + self.description = self.description[: self.desc_len] + + if self.desc_len > 3: + self.description = self.description[:-3] + "..." + + # runs only when this is the last node + if node.parent is None: + # switch to use the list if the description is only the same list + if self.description == self.first_list.replace( + ",", "" + ): + self.description = self.first_list