diff --git a/docs/conf.py b/docs/conf.py index 9488b636..142a3a6e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -45,6 +45,8 @@ # This pattern also affects html_static_path and html_extra_path. exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] +nitpick_ignore = [("py:class", "Match")] + # -- Options for HTML output ------------------------------------------------- diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py index c69a0510..98ce7aa2 100644 --- a/markdown_it/parser_core.py +++ b/markdown_it/parser_core.py @@ -8,7 +8,7 @@ from .ruler import Ruler from .rules_core.state_core import StateCore -from .rules_core import normalize, block, inline +from .rules_core import normalize, block, inline, replace # TODO linkify, replacements, smartquotes _rules = [ @@ -16,7 +16,7 @@ ["block", block], ["inline", inline], # [ 'linkify', require('./rules_core/linkify') ], - # [ 'replacements', require('./rules_core/replacements') ], + ["replacements", replace], # [ 'smartquotes', require('./rules_core/smartquotes') ] ] diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py index a5b8764e..72757f51 100644 --- a/markdown_it/rules_core/__init__.py +++ b/markdown_it/rules_core/__init__.py @@ -2,3 +2,4 @@ from .normalize import normalize # noqa: F401 from .block import block # noqa: F401 from .inline import inline # noqa: F401 +from .replacements import replace # noqa: F401 diff --git a/markdown_it/rules_core/replacements.js b/markdown_it/rules_core/replacements.js deleted file mode 100644 index 533496fa..00000000 --- a/markdown_it/rules_core/replacements.js +++ /dev/null @@ -1,107 +0,0 @@ -// Simple typographic replacements -// -// (c) (C) → © -// (tm) (TM) → ™ -// (r) (R) → ® -// +- → ± -// (p) (P) -> § -// ... → … (also ?.... → ?.., !.... → !..) -// ???????? → ???, !!!!! → !!!, `,,` → `,` -// -- → –, --- → — -// -'use strict'; - -// TODO: -// - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾ -// - miltiplication 2 x 4 -> 2 × 4 - -var RARE_RE = /\+-|\.\.|\?\?\?\?|!!!!|,,|--/; - -// Workaround for phantomjs - need regex without /g flag, -// or root check will fail every second time -var SCOPED_ABBR_TEST_RE = /\((c|tm|r|p)\)/i; - -var SCOPED_ABBR_RE = /\((c|tm|r|p)\)/ig; -var SCOPED_ABBR = { - c: '©', - r: '®', - p: '§', - tm: '™' -}; - -function replaceFn(match, name) { - return SCOPED_ABBR[name.toLowerCase()]; -} - -function replace_scoped(inlineTokens) { - var i, token, inside_autolink = 0; - - for (i = inlineTokens.length - 1; i >= 0; i--) { - token = inlineTokens[i]; - - if (token.type === 'text' && !inside_autolink) { - token.content = token.content.replace(SCOPED_ABBR_RE, replaceFn); - } - - if (token.type === 'link_open' && token.info === 'auto') { - inside_autolink--; - } - - if (token.type === 'link_close' && token.info === 'auto') { - inside_autolink++; - } - } -} - -function replace_rare(inlineTokens) { - var i, token, inside_autolink = 0; - - for (i = inlineTokens.length - 1; i >= 0; i--) { - token = inlineTokens[i]; - - if (token.type === 'text' && !inside_autolink) { - if (RARE_RE.test(token.content)) { - token.content = token.content - .replace(/\+-/g, '±') - // .., ..., ....... -> … - // but ?..... & !..... -> ?.. & !.. - .replace(/\.{2,}/g, '…').replace(/([?!])…/g, '$1..') - .replace(/([?!]){4,}/g, '$1$1$1').replace(/,{2,}/g, ',') - // em-dash - .replace(/(^|[^-])---(?=[^-]|$)/mg, '$1\u2014') - // en-dash - .replace(/(^|\s)--(?=\s|$)/mg, '$1\u2013') - .replace(/(^|[^-\s])--(?=[^-\s]|$)/mg, '$1\u2013'); - } - } - - if (token.type === 'link_open' && token.info === 'auto') { - inside_autolink--; - } - - if (token.type === 'link_close' && token.info === 'auto') { - inside_autolink++; - } - } -} - - -module.exports = function replace(state) { - var blkIdx; - - if (!state.md.options.typographer) { return; } - - for (blkIdx = state.tokens.length - 1; blkIdx >= 0; blkIdx--) { - - if (state.tokens[blkIdx].type !== 'inline') { continue; } - - if (SCOPED_ABBR_TEST_RE.test(state.tokens[blkIdx].content)) { - replace_scoped(state.tokens[blkIdx].children); - } - - if (RARE_RE.test(state.tokens[blkIdx].content)) { - replace_rare(state.tokens[blkIdx].children); - } - - } -}; diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py new file mode 100644 index 00000000..86e3c3c4 --- /dev/null +++ b/markdown_it/rules_core/replacements.py @@ -0,0 +1,123 @@ +"""Simple typographic replacements + +* ``(c)``, ``(C)`` → © +* ``(tm)``, ``(TM)`` → ™ +* ``(r)``, ``(R)`` → ® +* ``(p)``, ``(P)`` → § +* ``+-`` → ± +* ``...`` → … +* ``?....`` → ?.. +* ``!....`` → !.. +* ``????????`` → ??? +* ``!!!!!`` → !!! +* ``,,,`` → , +* ``--`` → &ndash +* ``---`` → &mdash +""" +import logging +import re +from typing import List, Match + +from .state_core import StateCore +from ..token import Token + +LOGGER = logging.getLogger(__name__) + +# TODO: +# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾ +# - miltiplication 2 x 4 -> 2 × 4 + +RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--") + +# Workaround for phantomjs - need regex without /g flag, +# or root check will fail every second time +# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)" + +SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE) + +PLUS_MINUS_RE = re.compile(r"\+-") + +ELLIPSIS_RE = re.compile(r"\.{2,}") + +ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…") + +QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}") + +COMMA_RE = re.compile(r",{2,}") + +EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE) + +EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE) + +EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE) + + +SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"} + + +def replaceFn(match: Match[str]): + return SCOPED_ABBR[match.group(1).lower()] + + +def replace_scoped(inlineTokens: List[Token]): + inside_autolink = 0 + + for token in inlineTokens: + if token.type == "text" and not inside_autolink: + token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content) + + if token.type == "link_open" and token.info == "auto": + inside_autolink -= 1 + + if token.type == "link_close" and token.info == "auto": + inside_autolink += 1 + + +def replace_rare(inlineTokens: List[Token]): + inside_autolink = 0 + + for token in inlineTokens: + if token.type == "text" and not inside_autolink: + if RARE_RE.search(token.content): + # +- -> ± + token.content = PLUS_MINUS_RE.sub("±", token.content) + + # .., ..., ....... -> … + token.content = ELLIPSIS_RE.sub("…", token.content) + + # but ?..... & !..... -> ?.. & !.. + token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub( + "\\1..", token.content + ) + token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content) + + # ,, ,,, ,,,, -> , + token.content = COMMA_RE.sub(",", token.content) + + # em-dash + token.content = EM_DASH_RE.sub("\\1\u2014", token.content) + + # en-dash + token.content = EN_DASH_RE.sub("\\1\u2013", token.content) + token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content) + + if token.type == "link_open" and token.info == "auto": + inside_autolink -= 1 + + if token.type == "link_close" and token.info == "auto": + inside_autolink += 1 + + +def replace(state: StateCore): + if not state.md.options.typographer: + return + + for token in state.tokens: + if token.type != "inline": + continue + + if SCOPED_ABBR_RE.search(token.content): + replace_scoped(token.children) + + if RARE_RE.search(token.content): + replace_rare(token.children) diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py index 575a246e..3eebdf60 100644 --- a/tests/test_api/test_main.py +++ b/tests/test_api/test_main.py @@ -7,7 +7,7 @@ def test_get_rules(): md = MarkdownIt("zero") # print(md.get_all_rules()) assert md.get_all_rules() == { - "core": ["normalize", "block", "inline"], + "core": ["normalize", "block", "inline", "replacements"], "block": [ "table", "code", diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md index e9e2bfee..39154ed0 100644 --- a/tests/test_port/fixtures/typographer.md +++ b/tests/test_port/fixtures/typographer.md @@ -4,6 +4,19 @@
(bad)
. +copyright (Lower) +. +(c) +. +©
+. + +copyright (Upper) +. +(C) +. +©
+. copyright . @@ -61,6 +74,14 @@ dupes . +dupes-ellipsis +. +!... ?... ,... !!!!!!.... ????.... ,,... +. +!.. ?.. ,… !!!.. ???.. ,…
+. + + dashes . ---markdownit --- super--- diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py index 1bc74220..68c09fb7 100644 --- a/tests/test_port/test_fixtures.py +++ b/tests/test_port/test_fixtures.py @@ -8,6 +8,17 @@ FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures") +@pytest.mark.parametrize( + "line,title,input,expected", + read_fixture_file(FIXTURE_PATH.joinpath("typographer.md")), +) +def test_typographer(line, title, input, expected): + md = MarkdownIt().enable("replacements") + md.options["typographer"] = True + text = md.render(input) + assert text.rstrip() == expected.rstrip() + + @pytest.mark.parametrize( "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("tables.md")) )