Skip to content

Refactor codebase #24

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Oct 1, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import subprocess

import setuptools

# This will fail if something happens or if not in a git repository.
Expand Down
103 changes: 103 additions & 0 deletions sphinxext/opengraph/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
from typing import Any, Dict
from urllib.parse import urljoin

import docutils.nodes as nodes
from sphinx.application import Sphinx

from .descriptionparser import get_description
from .titleparser import get_title

DEFAULT_DESCRIPTION_LENGTH = 200


def make_tag(property: str, content: str) -> str:
return f'<meta property="{property}" content="{content}" />\n '


def get_tags(
context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any]
) -> str:

# Set length of description
try:
desc_len = int(config["ogp_description_length"])
except ValueError:
desc_len = DEFAULT_DESCRIPTION_LENGTH

# Get the title and parse any html in it
title = get_title(context["title"], skip_html_tags=False)
title_excluding_html = get_title(context["title"], skip_html_tags=True)

# Parse/walk doctree for metadata (tag/description)
description = get_description(doctree, desc_len, [title, title_excluding_html])

tags = "\n "

# title tag
tags += make_tag("og:title", title)

# type tag
tags += make_tag("og:type", config["ogp_type"])

# url tag
# Get the URL of the specific page
page_url = urljoin(
config["ogp_site_url"], context["pagename"] + context["file_suffix"]
)
tags += make_tag("og:url", page_url)

# site name tag
site_name = config["ogp_site_name"]
if site_name:
tags += make_tag("og:site_name", site_name)

# description tag
tags += make_tag("og:description", description)

# image tag
# Get the image from the config
image_url = config["ogp_image"]
if image_url:
tags += make_tag("og:image", image_url)

# Add image alt text (either provided by config or from site_name)
ogp_image_alt = config["ogp_image_alt"]
if isinstance(ogp_image_alt, str):
tags += make_tag("og:image:alt", ogp_image_alt)
elif ogp_image_alt and site_name:
tags += make_tag("og:image:alt", site_name)
elif ogp_image_alt and title:
tags += make_tag("og:image:alt", title)

# custom tags
tags += "\n".join(config["ogp_custom_meta_tags"])

return tags


def html_page_context(
app: Sphinx,
pagename: str,
templatename: str,
context: Dict[str, Any],
doctree: nodes.document,
) -> None:
if doctree:
context["metatags"] += get_tags(context, doctree, app.config)


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value("ogp_site_url", None, "html")
app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
app.add_config_value("ogp_image", None, "html")
app.add_config_value("ogp_image_alt", True, "html")
app.add_config_value("ogp_type", "website", "html")
app.add_config_value("ogp_site_name", None, "html")
app.add_config_value("ogp_custom_meta_tags", [], "html")

app.connect("html-page-context", html_page_context)

return {
"parallel_read_safe": True,
"parallel_write_safe": True,
}
135 changes: 11 additions & 124 deletions sphinxext/opengraph.py → sphinxext/opengraph/descriptionparser.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,10 @@
from typing import Any, Dict, Iterable, Sequence, Tuple
from urllib.parse import urljoin
import docutils.nodes as nodes
import string
from html.parser import HTMLParser
import sphinx
from sphinx.application import Sphinx

DEFAULT_DESCRIPTION_LENGTH = 200


class HTMLTextParser(HTMLParser):
"""
Parse HTML into text
"""

def __init__(self):
super().__init__()
# All text found
self.text = ""
# Only text outside of html tags
self.text_outside_tags = ""
self.level = 0
from typing import Iterable

def handle_starttag(self, tag, attrs) -> None:
self.level += 1

def handle_endtag(self, tag) -> None:
self.level -= 1

def handle_data(self, data) -> None:
self.text += data
if self.level == 0:
self.text_outside_tags += data
import docutils.nodes as nodes


class OGMetadataCreatorVisitor(nodes.NodeVisitor):
class DescriptionParser(nodes.NodeVisitor):
"""
Finds the title and creates a description from a doctree
"""
Expand Down Expand Up @@ -145,96 +115,13 @@ def dispatch_departure(self, node: nodes.Element) -> None:
self.stop = True


def make_tag(property: str, content: str) -> str:
return f'<meta property="{property}" content="{content}" />\n '


def get_tags(
context: Dict[str, Any], doctree: nodes.document, config: Dict[str, Any]
) -> str:

# Set length of description
try:
desc_len = int(config["ogp_description_length"])
except ValueError:
desc_len = DEFAULT_DESCRIPTION_LENGTH

# Get the title and parse any html in it
htp = HTMLTextParser()
htp.feed(context["title"])
htp.close()
def get_description(
doctree: nodes.document,
description_length: int,
known_titles: Iterable[str] = None,
document: nodes.document = None,
):

# Parse/walk doctree for metadata (tag/description)
mcv = OGMetadataCreatorVisitor(desc_len, [htp.text, htp.text_outside_tags])
mcv = DescriptionParser(description_length, known_titles, document)
doctree.walkabout(mcv)

tags = "\n "

# title tag
tags += make_tag("og:title", htp.text)

# type tag
tags += make_tag("og:type", config["ogp_type"])

# url tag
# Get the URL of the specific page
page_url = urljoin(
config["ogp_site_url"], context["pagename"] + context["file_suffix"]
)
tags += make_tag("og:url", page_url)

# site name tag
site_name = config["ogp_site_name"]
if site_name:
tags += make_tag("og:site_name", site_name)

# description tag
tags += make_tag("og:description", mcv.description)

# image tag
# Get the image from the config
image_url = config["ogp_image"]
if image_url:
tags += make_tag("og:image", image_url)

# Add image alt text (either provided by config or from site_name)
ogp_image_alt = config["ogp_image_alt"]
if isinstance(ogp_image_alt, str):
tags += make_tag("og:image:alt", ogp_image_alt)
elif ogp_image_alt and site_name:
tags += make_tag("og:image:alt", site_name)
elif ogp_image_alt and htp.text:
tags += make_tag("og:image:alt", htp.text)

# custom tags
tags += "\n".join(config["ogp_custom_meta_tags"])

return tags


def html_page_context(
app: Sphinx,
pagename: str,
templatename: str,
context: Dict[str, Any],
doctree: nodes.document,
) -> None:
if doctree:
context["metatags"] += get_tags(context, doctree, app.config)


def setup(app: Sphinx) -> Dict[str, Any]:
app.add_config_value("ogp_site_url", None, "html")
app.add_config_value("ogp_description_length", DEFAULT_DESCRIPTION_LENGTH, "html")
app.add_config_value("ogp_image", None, "html")
app.add_config_value("ogp_image_alt", True, "html")
app.add_config_value("ogp_type", "website", "html")
app.add_config_value("ogp_site_name", None, "html")
app.add_config_value("ogp_custom_meta_tags", [], "html")

app.connect("html-page-context", html_page_context)

return {
"parallel_read_safe": True,
"parallel_write_safe": True,
}
return mcv.description
37 changes: 37 additions & 0 deletions sphinxext/opengraph/titleparser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from html.parser import HTMLParser


class HTMLTextParser(HTMLParser):
"""
Parse HTML into text
"""

def __init__(self):
super().__init__()
# All text found
self.text = ""
# Only text outside of html tags
self.text_outside_tags = ""
self.level = 0

def handle_starttag(self, tag, attrs) -> None:
self.level += 1

def handle_endtag(self, tag) -> None:
self.level -= 1

def handle_data(self, data) -> None:
self.text += data
if self.level == 0:
self.text_outside_tags += data


def get_title(title: str, skip_html_tags: bool = False):
htp = HTMLTextParser()
htp.feed(title)
htp.close()

if skip_html_tags:
return htp.text_outside_tags
else:
return htp.text
2 changes: 1 addition & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from sphinx.testing.path import path
import pytest
from bs4 import BeautifulSoup
from sphinx.testing.path import path

pytest_plugins = "sphinx.testing.fixtures"

Expand Down