sphinx-doc · Daltz333 · Oct 1, 2020 · Oct 1, 2020
diff --git a/setup.py b/setup.py
@@ -1,4 +1,5 @@
 import subprocess
+
 import setuptools
 
 # This will fail if something happens or if not in a git repository.

diff --git a/sphinxext/opengraph/__init__.py b/sphinxext/opengraph/__init__.py
@@ -0,0 +1,103 @@
+from typing import Any, Dict
+from urllib.parse import urljoin
+
+import docutils.nodes as nodes
+from sphinx.application import Sphinx
+
+from .descriptionparser import get_description
+from .titleparser import get_title
+
+DEFAULT_DESCRIPTION_LENGTH = 200
+
+
+def make_tag(property: str, content: str) -> str:
+    return f'<meta property="{property}" content="{content}" />\n  '
+
+
+def get_tags(
+    context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any]
+) -> str:
+
+    # Set length of description
+    try:
+        desc_len = int(config["ogp_description_length"])
+    except ValueError:
+        desc_len = DEFAULT_DESCRIPTION_LENGTH
+
+    # Get the title and parse any html in it
+    title = get_title(context["title"], skip_html_tags=False)
+    title_excluding_html = get_title(context["title"], skip_html_tags=True)
+
+    # Parse/walk doctree for metadata (tag/description)
+    description = get_description(doctree, desc_len, [title, title_excluding_html])
+
+    tags = "\n  "
+
+    # title tag
+    tags += make_tag("og:title", title)
+
+    # type tag
+    tags += make_tag("og:type", config["ogp_type"])
+
+    # url tag
+    # Get the URL of the specific page
+    page_url = urljoin(
+        config["ogp_site_url"], context["pagename"] + context["file_suffix"]
+    )
+    tags += make_tag("og:url", page_url)
+
+    # site name tag
+    site_name = config["ogp_site_name"]
+    if site_name:
+        tags += make_tag("og:site_name", site_name)
+
+    # description tag
+    tags += make_tag("og:description", description)
+
+    # image tag
+    # Get the image from the config
+    image_url = config["ogp_image"]
+    if image_url:
+        tags += make_tag("og:image", image_url)
+
+    # Add image alt text (either provided by config or from site_name)
+    ogp_image_alt = config["ogp_image_alt"]
+    if isinstance(ogp_image_alt, str):
+        tags += make_tag("og:image:alt", ogp_image_alt)
+    elif ogp_image_alt and site_name:
+        tags += make_tag("og:image:alt", site_name)
+    elif ogp_image_alt and title:
+        tags += make_tag("og:image:alt", title)
+
+    # custom tags
+    tags += "\n".join(config["ogp_custom_meta_tags"])
+
+    return tags
+
+
+def html_page_context(
+    app: Sphinx,
+    pagename: str,
+    templatename: str,
+    context: Dict[str, Any],
+    doctree: nodes.document,
+) -> None:
+    if doctree:
+        context["metatags"] += get_tags(context, doctree, app.config)
+
+
+def setup(app: Sphinx) -> Dict[str, Any]:
+    app.add_config_value("ogp_site_url", None, "html")
+    app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
+    app.add_config_value("ogp_image", None, "html")
+    app.add_config_value("ogp_image_alt", True, "html")
+    app.add_config_value("ogp_type", "website", "html")
+    app.add_config_value("ogp_site_name", None, "html")
+    app.add_config_value("ogp_custom_meta_tags", [], "html")
+
+    app.connect("html-page-context", html_page_context)
+
+    return {
+        "parallel_read_safe": True,
+        "parallel_write_safe": True,
+    }
diff --git a/sphinxext/opengraph.py → sphinxext/opengraph/descriptionparser.py b/sphinxext/opengraph.py → sphinxext/opengraph/descriptionparser.py
@@ -1,40 +1,10 @@
-from typing import Any, Dict, Iterable, Sequence, Tuple
-from urllib.parse import urljoin
-import docutils.nodes as nodes
 import string
-from html.parser import HTMLParser
-import sphinx
-from sphinx.application import Sphinx
-
-DEFAULT_DESCRIPTION_LENGTH = 200
-
-
-class HTMLTextParser(HTMLParser):
-    """
-    Parse HTML into text
-    """
-
-    def __init__(self):
-        super().__init__()
-        # All text found
-        self.text = ""
-        # Only text outside of html tags
-        self.text_outside_tags = ""
-        self.level = 0
+from typing import Iterable
 
-    def handle_starttag(self, tag, attrs) -> None:
-        self.level += 1
-
-    def handle_endtag(self, tag) -> None:
-        self.level -= 1
-
-    def handle_data(self, data) -> None:
-        self.text += data
-        if self.level == 0:
-            self.text_outside_tags += data
+import docutils.nodes as nodes
 
 
-class OGMetadataCreatorVisitor(nodes.NodeVisitor):
+class DescriptionParser(nodes.NodeVisitor):
     """
     Finds the title and creates a description from a doctree
     """
@@ -145,96 +115,13 @@ def dispatch_departure(self, node: nodes.Element) -> None:
             self.stop = True
 
 
-def make_tag(property: str, content: str) -> str:
-    return f'<meta property="{property}" content="{content}" />\n  '
-
-
-def get_tags(
-    context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any]
-) -> str:
-
-    # Set length of description
-    try:
-        desc_len = int(config["ogp_description_length"])
-    except ValueError:
-        desc_len = DEFAULT_DESCRIPTION_LENGTH
-
-    # Get the title and parse any html in it
-    htp = HTMLTextParser()
-    htp.feed(context["title"])
-    htp.close()
+def get_description(
+    doctree: nodes.document,
+    description_length: int,
+    known_titles: Iterable[str] = None,
+    document: nodes.document = None,
+):
 
-    # Parse/walk doctree for metadata (tag/description)
-    mcv = OGMetadataCreatorVisitor(desc_len, [htp.text, htp.text_outside_tags])
+    mcv = DescriptionParser(description_length, known_titles, document)
     doctree.walkabout(mcv)
-
-    tags = "\n  "
-
-    # title tag
-    tags += make_tag("og:title", htp.text)
-
-    # type tag
-    tags += make_tag("og:type", config["ogp_type"])
-
-    # url tag
-    # Get the URL of the specific page
-    page_url = urljoin(
-        config["ogp_site_url"], context["pagename"] + context["file_suffix"]
-    )
-    tags += make_tag("og:url", page_url)
-
-    # site name tag
-    site_name = config["ogp_site_name"]
-    if site_name:
-        tags += make_tag("og:site_name", site_name)
-
-    # description tag
-    tags += make_tag("og:description", mcv.description)
-
-    # image tag
-    # Get the image from the config
-    image_url = config["ogp_image"]
-    if image_url:
-        tags += make_tag("og:image", image_url)
-
-    # Add image alt text (either provided by config or from site_name)
-    ogp_image_alt = config["ogp_image_alt"]
-    if isinstance(ogp_image_alt, str):
-        tags += make_tag("og:image:alt", ogp_image_alt)
-    elif ogp_image_alt and site_name:
-        tags += make_tag("og:image:alt", site_name)
-    elif ogp_image_alt and htp.text:
-        tags += make_tag("og:image:alt", htp.text)
-
-    # custom tags
-    tags += "\n".join(config["ogp_custom_meta_tags"])
-
-    return tags
-
-
-def html_page_context(
-    app: Sphinx,
-    pagename: str,
-    templatename: str,
-    context: Dict[str, Any],
-    doctree: nodes.document,
-) -> None:
-    if doctree:
-        context["metatags"] += get_tags(context, doctree, app.config)
-
-
-def setup(app: Sphinx) -> Dict[str, Any]:
-    app.add_config_value("ogp_site_url", None, "html")
-    app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
-    app.add_config_value("ogp_image", None, "html")
-    app.add_config_value("ogp_image_alt", True, "html")
-    app.add_config_value("ogp_type", "website", "html")
-    app.add_config_value("ogp_site_name", None, "html")
-    app.add_config_value("ogp_custom_meta_tags", [], "html")
-
-    app.connect("html-page-context", html_page_context)
-
-    return {
-        "parallel_read_safe": True,
-        "parallel_write_safe": True,
-    }
+    return mcv.description
diff --git a/sphinxext/opengraph/titleparser.py b/sphinxext/opengraph/titleparser.py
@@ -0,0 +1,37 @@
+from html.parser import HTMLParser
+
+
+class HTMLTextParser(HTMLParser):
+    """
+    Parse HTML into text
+    """
+
+    def __init__(self):
+        super().__init__()
+        # All text found
+        self.text = ""
+        # Only text outside of html tags
+        self.text_outside_tags = ""
+        self.level = 0
+
+    def handle_starttag(self, tag, attrs) -> None:
+        self.level += 1
+
+    def handle_endtag(self, tag) -> None:
+        self.level -= 1
+
+    def handle_data(self, data) -> None:
+        self.text += data
+        if self.level == 0:
+            self.text_outside_tags += data
+
+
+def get_title(title: str, skip_html_tags: bool = False):
+    htp = HTMLTextParser()
+    htp.feed(title)
+    htp.close()
+
+    if skip_html_tags:
+        return htp.text_outside_tags
+    else:
+        return htp.text
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -1,6 +1,6 @@
-from sphinx.testing.path import path
 import pytest
 from bs4 import BeautifulSoup
+from sphinx.testing.path import path
 
 pytest_plugins = "sphinx.testing.fixtures"