Skip to content

Commit e5221c2

Browse files
authored
refactor: replace bleach with nh3 (ammonia) (#295)
1 parent b509830 commit e5221c2

20 files changed

+75
-118
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ authors = [
77
]
88
readme = "README.rst"
99
license = {text = "Apache License, Version 2.0"}
10-
dependencies = ["bleach>=2.1.0", "docutils>=0.13.1", "Pygments>=2.5.1"]
10+
dependencies = ["nh3>=0.2.14", "docutils>=0.13.1", "Pygments>=2.5.1"]
1111
classifiers = [
1212
"Intended Audience :: Developers",
1313
"License :: OSI Approved :: Apache Software License",

readme_renderer/clean.py

Lines changed: 42 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -12,16 +12,12 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
import functools
16-
from typing import Any, Dict, Iterator, List, Optional
15+
from typing import Dict, Optional, Set
1716

18-
import bleach
19-
import bleach.callbacks
20-
import bleach.linkifier
21-
import bleach.sanitizer
17+
import nh3
2218

2319

24-
ALLOWED_TAGS = [
20+
ALLOWED_TAGS = {
2521
# Bleach Defaults
2622
"a", "abbr", "acronym", "b", "blockquote", "code", "em", "i", "li", "ol",
2723
"strong", "ul",
@@ -32,102 +28,62 @@
3228
"span", "sub", "summary", "sup", "table", "tbody", "td", "th", "thead",
3329
"tr", "tt", "kbd", "var", "input", "section", "aside", "nav", "s", "figure",
3430
"figcaption",
35-
]
31+
}
3632

3733
ALLOWED_ATTRIBUTES = {
3834
# Bleach Defaults
39-
"a": ["href", "title"],
40-
"abbr": ["title"],
41-
"acronym": ["title"],
35+
"a": {"href", "title"},
36+
"abbr": {"title"},
37+
"acronym": {"title"},
4238

4339
# Custom Additions
44-
"*": ["id"],
45-
"hr": ["class"],
46-
"img": ["src", "width", "height", "alt", "align", "class"],
47-
"span": ["class"],
48-
"th": ["align", "class"],
49-
"td": ["align", "colspan", "rowspan"],
50-
"div": ["align", "class"],
51-
"h1": ["align"],
52-
"h2": ["align"],
53-
"h3": ["align"],
54-
"h4": ["align"],
55-
"h5": ["align"],
56-
"h6": ["align"],
57-
"code": ["class"],
58-
"p": ["align", "class"],
59-
"pre": ["lang"],
60-
"ol": ["start"],
61-
"input": ["type", "checked", "disabled"],
62-
"aside": ["class"],
63-
"dd": ["class"],
64-
"dl": ["class"],
65-
"dt": ["class"],
66-
"ul": ["class"],
67-
"nav": ["class"],
68-
"figure": ["class"],
40+
"*": {"id"},
41+
"hr": {"class"},
42+
"img": {"src", "width", "height", "alt", "align", "class"},
43+
"span": {"class"},
44+
"th": {"align", "class"},
45+
"td": {"align", "colspan", "rowspan"},
46+
"div": {"align", "class"},
47+
"h1": {"align"},
48+
"h2": {"align"},
49+
"h3": {"align"},
50+
"h4": {"align"},
51+
"h5": {"align"},
52+
"h6": {"align"},
53+
"code": {"class"},
54+
"p": {"align", "class"},
55+
"pre": {"lang"},
56+
"ol": {"start"},
57+
"input": {"type", "checked", "disabled"},
58+
"aside": {"class"},
59+
"dd": {"class"},
60+
"dl": {"class"},
61+
"dt": {"class"},
62+
"ul": {"class"},
63+
"nav": {"class"},
64+
"figure": {"class"},
6965
}
7066

7167

72-
class DisabledCheckboxInputsFilter:
73-
# The typeshed for bleach (html5lib) filters is incomplete, use `typing.Any`
74-
# See https://github.com/python/typeshed/blob/505ea726415016e53638c8b584b8fdc9c722cac1/stubs/bleach/bleach/html5lib_shim.pyi#L7-L8 # noqa E501
75-
def __init__(self, source: Any) -> None:
76-
self.source = source
77-
78-
def __iter__(self) -> Iterator[Dict[str, Optional[str]]]:
79-
for token in self.source:
80-
if token.get("name") == "input":
81-
# only allow disabled checkbox inputs
82-
is_checkbox, is_disabled, unsafe_attrs = False, False, False
83-
for (_, attrname), value in token.get("data", {}).items():
84-
if attrname == "type" and value == "checkbox":
85-
is_checkbox = True
86-
elif attrname == "disabled":
87-
is_disabled = True
88-
elif attrname != "checked":
89-
unsafe_attrs = True
90-
break
91-
if is_checkbox and is_disabled and not unsafe_attrs:
92-
yield token
93-
else:
94-
yield token
95-
96-
def __getattr__(self, name: str) -> Any:
97-
return getattr(self.source, name)
98-
99-
10068
def clean(
10169
html: str,
102-
tags: Optional[List[str]] = None,
103-
attributes: Optional[Dict[str, List[str]]] = None
70+
tags: Optional[Set[str]] = None,
71+
attributes: Optional[Dict[str, Set[str]]] = None
10472
) -> Optional[str]:
10573
if tags is None:
10674
tags = ALLOWED_TAGS
10775
if attributes is None:
10876
attributes = ALLOWED_ATTRIBUTES
10977

110-
# Clean the output using Bleach
111-
cleaner = bleach.sanitizer.Cleaner(
112-
tags=tags,
113-
attributes=attributes,
114-
filters=[
115-
# Bleach Linkify makes it easy to modify links, however, we will
116-
# not be using it to create additional links.
117-
functools.partial(
118-
bleach.linkifier.LinkifyFilter,
119-
callbacks=[
120-
lambda attrs, new: attrs if not new else None,
121-
bleach.callbacks.nofollow,
122-
],
123-
skip_tags=["pre"],
124-
parse_email=False,
125-
),
126-
DisabledCheckboxInputsFilter,
127-
],
128-
)
12978
try:
130-
cleaned = cleaner.clean(html)
79+
cleaned = nh3.clean(
80+
html,
81+
tags=ALLOWED_TAGS,
82+
attributes=ALLOWED_ATTRIBUTES,
83+
link_rel="nofollow",
84+
url_schemes={"http", "https", "mailto"},
85+
)
86+
13187
return cleaned
13288
except ValueError:
13389
return None

readme_renderer/txt.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,4 +21,4 @@
2121

2222
def render(raw: str, **kwargs: Any) -> Optional[str]:
2323
rendered = html_escape(raw).replace("\n", "<br>")
24-
return clean(rendered, tags=["br"])
24+
return clean(rendered, tags={"br"})
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
&lt;iframe src="http://mymalicioussite.com/"&gt;Click here&lt;/iframe&gt;
1+
Click here
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,2 @@
11
<p>Something naughty this way comes</p>
2-
&lt;script&gt;
3-
alert("Hello");
4-
&lt;/script&gt;
2+

tests/fixtures/test_CommonMark_008.html

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,13 @@
44
<span class="bp">self</span><span class="o">.</span><span class="n">name</span> <span class="o">=</span> <span class="n">name</span>
55

66
<span class="k">def</span> <span class="nf">make_sound</span><span class="p">(</span><span class="bp">self</span><span class="p">):</span>
7-
<span class="nb">print</span><span class="p">(</span><span class="s1">&#39;Ruff!&#39;</span><span class="p">)</span>
7+
<span class="nb">print</span><span class="p">(</span><span class="s1">'Ruff!'</span><span class="p">)</span>
88

9-
<span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">&#39;Fido&#39;</span><span class="p">)</span>
9+
<span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">'Fido'</span><span class="p">)</span>
1010
</pre>
1111
<p>and then here is some bash:</p>
12-
<pre lang="bash"><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">&quot;</span><span class="nv">$1</span><span class="s2">&quot;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">&quot;--help&quot;</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span>
13-
<span class="w"> </span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;OK&quot;</span>
12+
<pre lang="bash"><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">"</span><span class="nv">$1</span><span class="s2">"</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"--help"</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span>
13+
<span class="w"> </span><span class="nb">echo</span><span class="w"> </span><span class="s2">"OK"</span>
1414
<span class="k">fi</span>
1515
</pre>
1616
<p>or click <a href="http://www.surveymonkey.com" rel="nofollow">SurveyMonkey</a></p>

tests/fixtures/test_GFM_019.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
<p><a href="http://commonmark.org" rel="nofollow">http://commonmark.org</a></p>
22
<p>(Visit <a href="https://encrypted.google.com/search?q=Markup+(business)" rel="nofollow">https://encrypted.google.com/search?q=Markup+(business)</a>)</p>
3-
<p>Anonymous FTP is available at <a>ftp://foo.bar.baz</a>.</p>
3+
<p>Anonymous FTP is available at <a rel="nofollow">ftp://foo.bar.baz</a>.</p>

tests/fixtures/test_GFM_020.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
<p><a href="mailto:[email protected]">[email protected]</a></p>
1+
<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>

tests/fixtures/test_GFM_021.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
<p>hello@mail+xyz.example isn't valid, but <a href="mailto:[email protected]">[email protected]</a> is.</p>
1+
<p>hello@mail+xyz.example isn't valid, but <a href="mailto:[email protected]" rel="nofollow">[email protected]</a> is.</p>

tests/fixtures/test_GFM_022.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<p><a href="mailto:[email protected]">[email protected]</a></p>
2-
<p><a href="mailto:[email protected]">[email protected]</a>.</p>
1+
<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a></p>
2+
<p><a href="mailto:[email protected]" rel="nofollow">[email protected]</a>.</p>
33
44

tests/fixtures/test_GFM_024.html

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,17 @@
11
<ul>
2-
<li><input type="checkbox" disabled> Valid unchecked checkbox</li>
3-
<li><input type="checkbox" checked disabled> Valid checked checkbox</li>
4-
<li> Invalid enabled checkbox</li>
2+
<li><input type="checkbox" disabled=""> Valid unchecked checkbox</li>
3+
<li><input type="checkbox" checked="" disabled=""> Valid checked checkbox</li>
4+
<li><input type="checkbox"> Invalid enabled checkbox</li>
55
<li>
6-
6+
<input>
77
</li>
88
<li>
9-
9+
<input type="submit">
1010
</li>
1111
<li>
12-
12+
<input>
1313
</li>
1414
<li>
15-
15+
<input type="checkbox" checked="">
1616
</li>
1717
</ul>

tests/fixtures/test_GFM_doublequotes.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
<pre><code>This is code text.
33
</code></pre>
44
<pre lang="python3"><span class="k">def</span> <span class="nf">this_is_python</span><span class="p">():</span>
5-
<span class="w"> </span><span class="sd">&quot;&quot;&quot;This is a docstring.&quot;&quot;&quot;</span>
5+
<span class="w"> </span><span class="sd">"""This is a docstring."""</span>
66
<span class="k">pass</span>
77
</pre>
88
<pre lang="go"><span class="kd">func</span><span class="w"> </span><span class="nx">ThisIsGo</span><span class="p">(){</span>
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
<p>This is normal text.</p>
22
<pre lang="python3"><span class="k">def</span> <span class="nf">this_is_python</span><span class="p">():</span>
3-
<span class="w"> </span><span class="sd">&quot;&quot;&quot;This is a docstring.&quot;&quot;&quot;</span>
3+
<span class="w"> </span><span class="sd">"""This is a docstring."""</span>
44
<span class="k">pass</span>
5-
<span class="o">&lt;</span><span class="n">script</span> <span class="nb">type</span><span class="o">=</span><span class="s2">&quot;text/javascript&quot;</span><span class="o">&gt;</span><span class="n">alert</span><span class="p">(</span><span class="s1">&#39;I am evil.&#39;</span><span class="p">);</span><span class="o">&lt;/</span><span class="n">script</span><span class="o">&gt;</span>
5+
<span class="o">&lt;</span><span class="n">script</span> <span class="nb">type</span><span class="o">=</span><span class="s2">"text/javascript"</span><span class="o">&gt;</span><span class="n">alert</span><span class="p">(</span><span class="s1">'I am evil.'</span><span class="p">);</span><span class="o">&lt;/</span><span class="n">script</span><span class="o">&gt;</span>
66
</pre>

tests/fixtures/test_rst_008.html

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@
88

99
</span><span class="n">dog</span> <span class="o">=</span> <span class="n">Dog</span><span class="p">(</span><span class="s1">'Fido'</span><span class="p">)</span></code></pre>
1010
<p>and then here is some bash:</p>
11-
<pre><code><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">&quot;</span><span class="nv">$1</span><span class="s2">&quot;</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">&quot;--help&quot;</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span><span class="w">
12-
</span><span class="nb">echo</span><span class="w"> </span><span class="s2">&quot;OK&quot;</span><span class="w">
11+
<pre><code><span class="k">if</span><span class="w"> </span><span class="o">[</span><span class="w"> </span><span class="s2">"</span><span class="nv">$1</span><span class="s2">"</span><span class="w"> </span><span class="o">=</span><span class="w"> </span><span class="s2">"--help"</span><span class="w"> </span><span class="o">]</span><span class="p">;</span><span class="w"> </span><span class="k">then</span><span class="w">
12+
</span><span class="nb">echo</span><span class="w"> </span><span class="s2">"OK"</span><span class="w">
1313
</span><span class="k">fi</span></code></pre>
1414
<p>or click <a href="http://www.surveymonkey.com" rel="nofollow">SurveyMonkey</a></p>
1515
<pre><code>An unknown code fence block</code></pre>

tests/fixtures/test_rst_bibtex.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
<pre><code><span class="nc">&#64;article</span><span class="p">{</span><span class="nl">the_impact_of_pygments_docutils_config_and_html5</span><span class="p">,</span><span class="w">
1+
<pre><code><span class="nc">@article</span><span class="p">{</span><span class="nl">the_impact_of_pygments_docutils_config_and_html5</span><span class="p">,</span><span class="w">
22
</span><span class="na">year</span><span class="w"> </span><span class="p">=</span><span class="w"> </span><s>{2022}</s><span class="p">,</span></code></pre>

tests/fixtures/test_rst_docinfo.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
<dd class="created"><p>mer 02 ago 2017 14:49:24 CEST</p>
77
</dd>
88
<dt class="author">Author<span class="colon">:</span></dt>
9-
<dd class="author"><p>Lele Gaifax &lt;<a href="mailto:lele&#37;&#52;&#48;metapensiero&#46;it">lele<span>&#64;</span>metapensiero<span>&#46;</span>it</a>&gt;</p></dd>
9+
<dd class="author"><p>Lele Gaifax &lt;<a href="mailto:lele%40metapensiero.it" rel="nofollow">lele<span>@</span>metapensiero<span>.</span>it</a>&gt;</p></dd>
1010
<dt class="license">License<span class="colon">:</span></dt>
1111
<dd class="license"><p>GNU General Public License version 3 or later</p>
1212
</dd>

tests/fixtures/test_rst_linkify.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ <h2>Development</h2>
4242
<dd><p><a href="http://multigtfs.readthedocs.org/" rel="nofollow">http://multigtfs.readthedocs.org/</a></p>
4343
</dd>
4444
<dt>IRC<span class="colon">:</span></dt>
45-
<dd><p><a>irc://irc.freenode.net/tulsawebdevs</a></p>
45+
<dd><p><a rel="nofollow">irc://irc.freenode.net/tulsawebdevs</a></p>
4646
</dd>
4747
</dl>
4848
</section>

0 commit comments

Comments
 (0)