executablebooks · chrisjsewell · Oct 21, 2020 · Oct 19, 2020 · Oct 20, 2020 · Oct 20, 2020
diff --git a/markdown_it/parser_core.py b/markdown_it/parser_core.py
@@ -8,15 +8,15 @@
 
 from .ruler import Ruler
 from .rules_core.state_core import StateCore
-from .rules_core import normalize, block, inline
+from .rules_core import normalize, block, inline, replace
 
 # TODO linkify, replacements, smartquotes
 _rules = [
     ["normalize", normalize],
     ["block", block],
     ["inline", inline],
     #   [ 'linkify',        require('./rules_core/linkify')        ],
-    #   [ 'replacements',   require('./rules_core/replacements')   ],
+    ['replacements', replace],
     #   [ 'smartquotes',    require('./rules_core/smartquotes')    ]
 ]
 

diff --git a/markdown_it/rules_core/__init__.py b/markdown_it/rules_core/__init__.py
@@ -2,3 +2,4 @@
 from .normalize import normalize  # noqa: F401
 from .block import block  # noqa: F401
 from .inline import inline  # noqa: F401
+from .replacements import replace  # noqa: F401
diff --git a/markdown_it/rules_core/replacements.js b/markdown_it/rules_core/replacements.js
diff --git a/markdown_it/rules_core/replacements.py b/markdown_it/rules_core/replacements.py
@@ -0,0 +1,100 @@
+"""Simple typographic replacements
+
+(c) (C) → ©
+(tm) (TM) → ™
+(r) (R) → ®
++- → ±
+(p) (P) -> §
+... → … (also ?.... → ?.., !.... → !..)
+???????? → ???, !!!!! → !!!, `,,` → `,`
+-- → &ndash;, --- → &mdash;
+"""
+import logging
+import re
+from typing import List
-from typing import List
+from typing import List, Match
-from typing import List
+from typing import List, Match
+
+from .state_core import StateCore
+from ..token import Token
+
+LOGGER = logging.getLogger(__name__)
+
+# TODO:
+# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
+# - miltiplication 2 x 4 -> 2 × 4
+
+RARE_RE = r"\+-|\.\.|\?\?\?\?|!!!!|,,|--"
-RARE_RE = r"\+-|\.\.|\?\?\?\?|!!!!|,,|--"
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
-RARE_RE = r"\+-|\.\.|\?\?\?\?|!!!!|,,|--"
+RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
+
+# Workaround for phantomjs - need regex without /g flag,
+# or root check will fail every second time
+# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
+
+SCOPED_ABBR_RE = r"\((c|tm|r|p)\)"
-SCOPED_ABBR_RE = r"\((c|tm|r|p)\)"
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
-SCOPED_ABBR_RE = r"\((c|tm|r|p)\)"
+SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
+
+SCOPED_ABBR = {
+    "c": "©",
+    "r": "®",
+    "p": "§",
+    "tm": "™"
+}
+
+
+def replaceFn(match: re.Match):
-def replaceFn(match: re.Match):
+def replaceFn(match: Match):
-def replaceFn(match: re.Match):
+def replaceFn(match: Match):
+    return SCOPED_ABBR[match.group(1).lower()]
+
+
+def replace_scoped(inlineTokens: List[Token]):
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            token.content = re.sub(SCOPED_ABBR_RE, replaceFn, token.content, flags=re.IGNORECASE)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace_rare(inlineTokens: List[Token]):
+    inside_autolink = 0
+
+    for token in inlineTokens:
+        if token.type == "text" and not inside_autolink:
+            if re.search(RARE_RE, token.content):
+                token.content = re.sub(r"\+-", "±", token.content)
+                # .., ..., ....... -> …
+                # but ?..... & !..... -> ?.. & !..
+                token.content = re.sub(r"\.{2,}", "…", token.content)
+                token.content = re.sub(r"([?!])…", "\\1..", token.content)
+                token.content = re.sub(r"([?!]){4,}", "\\1\\1\\1", token.content)
+                token.content = re.sub(r",{2,}", ",", token.content)
+                # em-dash
+                token.content = re.sub(r"(^|[^-])---(?=[^-]|$)",
+                                       "\\1\u2014", token.content, flags=re.MULTILINE)
+                # en-dash
+                token.content = re.sub(r"(^|\s)--(?=\s|$)", "\\1\u2013",
+                                       token.content, flags=re.MULTILINE)
+                token.content = re.sub(r"(^|[^-\s])--(?=[^-\s]|$)",
+                                       "\\1\u2013", token.content, flags=re.MULTILINE)
+
+        if token.type == "link_open" and token.info == "auto":
+            inside_autolink -= 1
+
+        if token.type == "link_close" and token.info == "auto":
+            inside_autolink += 1
+
+
+def replace(state: StateCore):
+    if not state.md.options.typographer:
+        return
+
+    for token in state.tokens:
+        if token.type != "inline":
+            continue
+
+        if re.search(SCOPED_ABBR_RE, token.content, flags=re.IGNORECASE):
+            replace_scoped(token.children)
+
+        if re.search(RARE_RE, token.content):
+            replace_rare(token.children)
diff --git a/tests/test_api/test_main.py b/tests/test_api/test_main.py
@@ -7,7 +7,7 @@ def test_get_rules():
     md = MarkdownIt("zero")
     # print(md.get_all_rules())
     assert md.get_all_rules() == {
-        "core": ["normalize", "block", "inline"],
+        "core": ["normalize", "block", "inline", "replacements"],
         "block": [
             "table",
             "code",

diff --git a/tests/test_port/fixtures/typographer.md b/tests/test_port/fixtures/typographer.md
@@ -4,6 +4,19 @@
 <p>(bad)</p>
 .
 
+copyright (Lower)
+.
+(c)
+.
+<p>©</p>
+.
+
+copyright (Upper)
+.
+(C)
+.
+<p>©</p>
+.
 
 copyright
 .

diff --git a/tests/test_port/test_fixtures.py b/tests/test_port/test_fixtures.py
@@ -8,6 +8,16 @@
 FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures")
 
 
+@pytest.mark.parametrize(
+    "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("typographer.md"))
+)
+def test_typographer(line, title, input, expected):
+    md = MarkdownIt().enable("replacements")
+    md.options["typographer"] = True
+    text = md.render(input)
+    assert text.rstrip() == expected.rstrip()
+
+
 @pytest.mark.parametrize(
     "line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("tables.md"))
 )