Skip to content

Commit bae5cac

Browse files
committed
Update characters that need be quoted in attributes in the serializer per spec
This also moves to using re, which seems far cleaner than the reduce-based search previously used.
1 parent 69b8da5 commit bae5cac

File tree

1 file changed

+5
-7
lines changed

1 file changed

+5
-7
lines changed

html5lib/serializer/htmlserializer.py

+5-7
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,7 @@
11
from __future__ import absolute_import, division, unicode_literals
22
from six import text_type
33

4-
try:
5-
from functools import reduce
6-
except ImportError:
7-
pass
4+
import re
85

96
from ..constants import voidElements, booleanAttributes, spaceCharacters
107
from ..constants import rcdataElements, entities, xmlEntities
@@ -13,6 +10,8 @@
1310

1411
spaceCharacters = "".join(spaceCharacters)
1512

13+
quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]")
14+
1615
try:
1716
from codecs import register_error, xmlcharrefreplace_errors
1817
except ImportError:
@@ -240,11 +239,10 @@ def serialize(self, treewalker, encoding=None):
240239
(k not in booleanAttributes.get(name, tuple()) and
241240
k not in booleanAttributes.get("", tuple())):
242241
yield self.encodeStrict("=")
243-
if self.quote_attr_values or not v:
242+
if self.quote_attr_values:
244243
quote_attr = True
245244
else:
246-
quote_attr = reduce(lambda x, y: x or (y in v),
247-
spaceCharacters + ">\"'=", False)
245+
quote_attr = len(v) == 0 or quoteAttributeSpec.search(v)
248246
v = v.replace("&", "&amp;")
249247
if self.escape_lt_in_attrs:
250248
v = v.replace("<", "&lt;")

0 commit comments

Comments
 (0)