Skip to content

Commit f290ba9

Browse files
authored
✨ NEW: Add simple typographic replacements (#59)
1 parent c8f82bc commit f290ba9

File tree

8 files changed

+161
-110
lines changed

8 files changed

+161
-110
lines changed

docs/conf.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,8 @@
4545
# This pattern also affects html_static_path and html_extra_path.
4646
exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
4747

48+
nitpick_ignore = [("py:class", "Match")]
49+
4850

4951
# -- Options for HTML output -------------------------------------------------
5052

markdown_it/parser_core.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@
88

99
from .ruler import Ruler
1010
from .rules_core.state_core import StateCore
11-
from .rules_core import normalize, block, inline
11+
from .rules_core import normalize, block, inline, replace
1212

1313
# TODO linkify, replacements, smartquotes
1414
_rules = [
1515
["normalize", normalize],
1616
["block", block],
1717
["inline", inline],
1818
# [ 'linkify', require('./rules_core/linkify') ],
19-
# [ 'replacements', require('./rules_core/replacements') ],
19+
["replacements", replace],
2020
# [ 'smartquotes', require('./rules_core/smartquotes') ]
2121
]
2222

markdown_it/rules_core/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@
22
from .normalize import normalize # noqa: F401
33
from .block import block # noqa: F401
44
from .inline import inline # noqa: F401
5+
from .replacements import replace # noqa: F401

markdown_it/rules_core/replacements.js

Lines changed: 0 additions & 107 deletions
This file was deleted.
Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
"""Simple typographic replacements
2+
3+
* ``(c)``, ``(C)`` → ©
4+
* ``(tm)``, ``(TM)`` → ™
5+
* ``(r)``, ``(R)`` → ®
6+
* ``(p)``, ``(P)`` → §
7+
* ``+-`` → ±
8+
* ``...`` → …
9+
* ``?....`` → ?..
10+
* ``!....`` → !..
11+
* ``????????`` → ???
12+
* ``!!!!!`` → !!!
13+
* ``,,,`` → ,
14+
* ``--`` → &ndash
15+
* ``---`` → &mdash
16+
"""
17+
import logging
18+
import re
19+
from typing import List, Match
20+
21+
from .state_core import StateCore
22+
from ..token import Token
23+
24+
LOGGER = logging.getLogger(__name__)
25+
26+
# TODO:
27+
# - fractionals 1/2, 1/4, 3/4 -> ½, ¼, ¾
28+
# - miltiplication 2 x 4 -> 2 × 4
29+
30+
RARE_RE = re.compile(r"\+-|\.\.|\?\?\?\?|!!!!|,,|--")
31+
32+
# Workaround for phantomjs - need regex without /g flag,
33+
# or root check will fail every second time
34+
# SCOPED_ABBR_TEST_RE = r"\((c|tm|r|p)\)"
35+
36+
SCOPED_ABBR_RE = re.compile(r"\((c|tm|r|p)\)", flags=re.IGNORECASE)
37+
38+
PLUS_MINUS_RE = re.compile(r"\+-")
39+
40+
ELLIPSIS_RE = re.compile(r"\.{2,}")
41+
42+
ELLIPSIS_QUESTION_EXCLAMATION_RE = re.compile(r"([?!])…")
43+
44+
QUESTION_EXCLAMATION_RE = re.compile(r"([?!]){4,}")
45+
46+
COMMA_RE = re.compile(r",{2,}")
47+
48+
EM_DASH_RE = re.compile(r"(^|[^-])---(?=[^-]|$)", flags=re.MULTILINE)
49+
50+
EN_DASH_RE = re.compile(r"(^|\s)--(?=\s|$)", flags=re.MULTILINE)
51+
52+
EN_DASH_INDENT_RE = re.compile(r"(^|[^-\s])--(?=[^-\s]|$)", flags=re.MULTILINE)
53+
54+
55+
SCOPED_ABBR = {"c": "©", "r": "®", "p": "§", "tm": "™"}
56+
57+
58+
def replaceFn(match: Match[str]):
59+
return SCOPED_ABBR[match.group(1).lower()]
60+
61+
62+
def replace_scoped(inlineTokens: List[Token]):
63+
inside_autolink = 0
64+
65+
for token in inlineTokens:
66+
if token.type == "text" and not inside_autolink:
67+
token.content = SCOPED_ABBR_RE.sub(replaceFn, token.content)
68+
69+
if token.type == "link_open" and token.info == "auto":
70+
inside_autolink -= 1
71+
72+
if token.type == "link_close" and token.info == "auto":
73+
inside_autolink += 1
74+
75+
76+
def replace_rare(inlineTokens: List[Token]):
77+
inside_autolink = 0
78+
79+
for token in inlineTokens:
80+
if token.type == "text" and not inside_autolink:
81+
if RARE_RE.search(token.content):
82+
# +- -> ±
83+
token.content = PLUS_MINUS_RE.sub("±", token.content)
84+
85+
# .., ..., ....... -> …
86+
token.content = ELLIPSIS_RE.sub("…", token.content)
87+
88+
# but ?..... & !..... -> ?.. & !..
89+
token.content = ELLIPSIS_QUESTION_EXCLAMATION_RE.sub(
90+
"\\1..", token.content
91+
)
92+
token.content = QUESTION_EXCLAMATION_RE.sub("\\1\\1\\1", token.content)
93+
94+
# ,, ,,, ,,,, -> ,
95+
token.content = COMMA_RE.sub(",", token.content)
96+
97+
# em-dash
98+
token.content = EM_DASH_RE.sub("\\1\u2014", token.content)
99+
100+
# en-dash
101+
token.content = EN_DASH_RE.sub("\\1\u2013", token.content)
102+
token.content = EN_DASH_INDENT_RE.sub("\\1\u2013", token.content)
103+
104+
if token.type == "link_open" and token.info == "auto":
105+
inside_autolink -= 1
106+
107+
if token.type == "link_close" and token.info == "auto":
108+
inside_autolink += 1
109+
110+
111+
def replace(state: StateCore):
112+
if not state.md.options.typographer:
113+
return
114+
115+
for token in state.tokens:
116+
if token.type != "inline":
117+
continue
118+
119+
if SCOPED_ABBR_RE.search(token.content):
120+
replace_scoped(token.children)
121+
122+
if RARE_RE.search(token.content):
123+
replace_rare(token.children)

tests/test_api/test_main.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ def test_get_rules():
77
md = MarkdownIt("zero")
88
# print(md.get_all_rules())
99
assert md.get_all_rules() == {
10-
"core": ["normalize", "block", "inline"],
10+
"core": ["normalize", "block", "inline", "replacements"],
1111
"block": [
1212
"table",
1313
"code",

tests/test_port/fixtures/typographer.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,19 @@
44
<p>(bad)</p>
55
.
66

7+
copyright (Lower)
8+
.
9+
(c)
10+
.
11+
<p>©</p>
12+
.
13+
14+
copyright (Upper)
15+
.
16+
(C)
17+
.
18+
<p>©</p>
19+
.
720

821
copyright
922
.
@@ -61,6 +74,14 @@ dupes
6174
.
6275

6376

77+
dupes-ellipsis
78+
.
79+
!... ?... ,... !!!!!!.... ????.... ,,...
80+
.
81+
<p>!.. ?.. ,… !!!.. ???.. ,…</p>
82+
.
83+
84+
6485
dashes
6586
.
6687
---markdownit --- super---

tests/test_port/test_fixtures.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,17 @@
88
FIXTURE_PATH = Path(__file__).parent.joinpath("fixtures")
99

1010

11+
@pytest.mark.parametrize(
12+
"line,title,input,expected",
13+
read_fixture_file(FIXTURE_PATH.joinpath("typographer.md")),
14+
)
15+
def test_typographer(line, title, input, expected):
16+
md = MarkdownIt().enable("replacements")
17+
md.options["typographer"] = True
18+
text = md.render(input)
19+
assert text.rstrip() == expected.rstrip()
20+
21+
1122
@pytest.mark.parametrize(
1223
"line,title,input,expected", read_fixture_file(FIXTURE_PATH.joinpath("tables.md"))
1324
)

0 commit comments

Comments
 (0)