diff --git a/setup.py b/setup.py index c73acb8..87add4e 100644 --- a/setup.py +++ b/setup.py @@ -1,4 +1,5 @@ import subprocess + import setuptools # This will fail if something happens or if not in a git repository. diff --git a/sphinxext/opengraph/__init__.py b/sphinxext/opengraph/__init__.py new file mode 100644 index 0000000..e26bbdb --- /dev/null +++ b/sphinxext/opengraph/__init__.py @@ -0,0 +1,103 @@ +from typing import Any, Dict +from urllib.parse import urljoin + +import docutils.nodes as nodes +from sphinx.application import Sphinx + +from .descriptionparser import get_description +from .titleparser import get_title + +DEFAULT_DESCRIPTION_LENGTH = 200 + + +def make_tag(property: str, content: str) -> str: + return f'\n ' + + +def get_tags( + context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any] +) -> str: + + # Set length of description + try: + desc_len = int(config["ogp_description_length"]) + except ValueError: + desc_len = DEFAULT_DESCRIPTION_LENGTH + + # Get the title and parse any html in it + title = get_title(context["title"], skip_html_tags=False) + title_excluding_html = get_title(context["title"], skip_html_tags=True) + + # Parse/walk doctree for metadata (tag/description) + description = get_description(doctree, desc_len, [title, title_excluding_html]) + + tags = "\n " + + # title tag + tags += make_tag("og:title", title) + + # type tag + tags += make_tag("og:type", config["ogp_type"]) + + # url tag + # Get the URL of the specific page + page_url = urljoin( + config["ogp_site_url"], context["pagename"] + context["file_suffix"] + ) + tags += make_tag("og:url", page_url) + + # site name tag + site_name = config["ogp_site_name"] + if site_name: + tags += make_tag("og:site_name", site_name) + + # description tag + tags += make_tag("og:description", description) + + # image tag + # Get the image from the config + image_url = config["ogp_image"] + if image_url: + tags += make_tag("og:image", image_url) + + # Add image alt text (either provided by config or from site_name) + ogp_image_alt = config["ogp_image_alt"] + if isinstance(ogp_image_alt, str): + tags += make_tag("og:image:alt", ogp_image_alt) + elif ogp_image_alt and site_name: + tags += make_tag("og:image:alt", site_name) + elif ogp_image_alt and title: + tags += make_tag("og:image:alt", title) + + # custom tags + tags += "\n".join(config["ogp_custom_meta_tags"]) + + return tags + + +def html_page_context( + app: Sphinx, + pagename: str, + templatename: str, + context: Dict[str, Any], + doctree: nodes.document, +) -> None: + if doctree: + context["metatags"] += get_tags(context, doctree, app.config) + + +def setup(app: Sphinx) -> Dict[str, Any]: + app.add_config_value("ogp_site_url", None, "html") + app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html") + app.add_config_value("ogp_image", None, "html") + app.add_config_value("ogp_image_alt", True, "html") + app.add_config_value("ogp_type", "website", "html") + app.add_config_value("ogp_site_name", None, "html") + app.add_config_value("ogp_custom_meta_tags", [], "html") + + app.connect("html-page-context", html_page_context) + + return { + "parallel_read_safe": True, + "parallel_write_safe": True, + } diff --git a/sphinxext/opengraph.py b/sphinxext/opengraph/descriptionparser.py similarity index 50% rename from sphinxext/opengraph.py rename to sphinxext/opengraph/descriptionparser.py index 2efaa5d..82fb31a 100644 --- a/sphinxext/opengraph.py +++ b/sphinxext/opengraph/descriptionparser.py @@ -1,40 +1,10 @@ -from typing import Any, Dict, Iterable, Sequence, Tuple -from urllib.parse import urljoin -import docutils.nodes as nodes import string -from html.parser import HTMLParser -import sphinx -from sphinx.application import Sphinx - -DEFAULT_DESCRIPTION_LENGTH = 200 - - -class HTMLTextParser(HTMLParser): - """ - Parse HTML into text - """ - - def __init__(self): - super().__init__() - # All text found - self.text = "" - # Only text outside of html tags - self.text_outside_tags = "" - self.level = 0 +from typing import Iterable - def handle_starttag(self, tag, attrs) -> None: - self.level += 1 - - def handle_endtag(self, tag) -> None: - self.level -= 1 - - def handle_data(self, data) -> None: - self.text += data - if self.level == 0: - self.text_outside_tags += data +import docutils.nodes as nodes -class OGMetadataCreatorVisitor(nodes.NodeVisitor): +class DescriptionParser(nodes.NodeVisitor): """ Finds the title and creates a description from a doctree """ @@ -145,96 +115,13 @@ def dispatch_departure(self, node: nodes.Element) -> None: self.stop = True -def make_tag(property: str, content: str) -> str: - return f'\n ' - - -def get_tags( - context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any] -) -> str: - - # Set length of description - try: - desc_len = int(config["ogp_description_length"]) - except ValueError: - desc_len = DEFAULT_DESCRIPTION_LENGTH - - # Get the title and parse any html in it - htp = HTMLTextParser() - htp.feed(context["title"]) - htp.close() +def get_description( + doctree: nodes.document, + description_length: int, + known_titles: Iterable[str] = None, + document: nodes.document = None, +): - # Parse/walk doctree for metadata (tag/description) - mcv = OGMetadataCreatorVisitor(desc_len, [htp.text, htp.text_outside_tags]) + mcv = DescriptionParser(description_length, known_titles, document) doctree.walkabout(mcv) - - tags = "\n " - - # title tag - tags += make_tag("og:title", htp.text) - - # type tag - tags += make_tag("og:type", config["ogp_type"]) - - # url tag - # Get the URL of the specific page - page_url = urljoin( - config["ogp_site_url"], context["pagename"] + context["file_suffix"] - ) - tags += make_tag("og:url", page_url) - - # site name tag - site_name = config["ogp_site_name"] - if site_name: - tags += make_tag("og:site_name", site_name) - - # description tag - tags += make_tag("og:description", mcv.description) - - # image tag - # Get the image from the config - image_url = config["ogp_image"] - if image_url: - tags += make_tag("og:image", image_url) - - # Add image alt text (either provided by config or from site_name) - ogp_image_alt = config["ogp_image_alt"] - if isinstance(ogp_image_alt, str): - tags += make_tag("og:image:alt", ogp_image_alt) - elif ogp_image_alt and site_name: - tags += make_tag("og:image:alt", site_name) - elif ogp_image_alt and htp.text: - tags += make_tag("og:image:alt", htp.text) - - # custom tags - tags += "\n".join(config["ogp_custom_meta_tags"]) - - return tags - - -def html_page_context( - app: Sphinx, - pagename: str, - templatename: str, - context: Dict[str, Any], - doctree: nodes.document, -) -> None: - if doctree: - context["metatags"] += get_tags(context, doctree, app.config) - - -def setup(app: Sphinx) -> Dict[str, Any]: - app.add_config_value("ogp_site_url", None, "html") - app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html") - app.add_config_value("ogp_image", None, "html") - app.add_config_value("ogp_image_alt", True, "html") - app.add_config_value("ogp_type", "website", "html") - app.add_config_value("ogp_site_name", None, "html") - app.add_config_value("ogp_custom_meta_tags", [], "html") - - app.connect("html-page-context", html_page_context) - - return { - "parallel_read_safe": True, - "parallel_write_safe": True, - } + return mcv.description diff --git a/sphinxext/opengraph/titleparser.py b/sphinxext/opengraph/titleparser.py new file mode 100644 index 0000000..9c2cce7 --- /dev/null +++ b/sphinxext/opengraph/titleparser.py @@ -0,0 +1,37 @@ +from html.parser import HTMLParser + + +class HTMLTextParser(HTMLParser): + """ + Parse HTML into text + """ + + def __init__(self): + super().__init__() + # All text found + self.text = "" + # Only text outside of html tags + self.text_outside_tags = "" + self.level = 0 + + def handle_starttag(self, tag, attrs) -> None: + self.level += 1 + + def handle_endtag(self, tag) -> None: + self.level -= 1 + + def handle_data(self, data) -> None: + self.text += data + if self.level == 0: + self.text_outside_tags += data + + +def get_title(title: str, skip_html_tags: bool = False): + htp = HTMLTextParser() + htp.feed(title) + htp.close() + + if skip_html_tags: + return htp.text_outside_tags + else: + return htp.text diff --git a/tests/conftest.py b/tests/conftest.py index 67c4711..e0d15b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ -from sphinx.testing.path import path import pytest from bs4 import BeautifulSoup +from sphinx.testing.path import path pytest_plugins = "sphinx.testing.fixtures"