Skip to content

Commit 9dac020

Browse files
committed
fixup! Fix html5lib#11, html5lib#12: quote attributes that need escaping in legacy browsers
1 parent a337d3b commit 9dac020

File tree

1 file changed

+15
-8
lines changed

1 file changed

+15
-8
lines changed

html5lib/serializer/htmlserializer.py

+15-8
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,10 @@
1313

1414
spaceCharacters = "".join(spaceCharacters)
1515

16-
quoteAttributeSpec = re.compile("[" + spaceCharacters + "\"'=<>`]")
17-
quoteAttributeLegacy = re.compile("[\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
16+
quoteAttributeSpecChars = spaceCharacters + "\"'=<>`"
17+
quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]")
18+
quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars +
19+
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n"
1820
"\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15"
1921
"\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f"
2022
"\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000"
@@ -82,7 +84,7 @@ def htmlentityreplace_errors(exc):
8284
class HTMLSerializer(object):
8385

8486
# attribute quoting options
85-
quote_attr_values = "legacy"
87+
quote_attr_values = "legacy" # be secure by default
8688
quote_char = '"'
8789
use_best_quote_char = True
8890

@@ -118,9 +120,9 @@ def __init__(self, **kwargs):
118120
inject_meta_charset=True|False
119121
Whether it insert a meta element to define the character set of the
120122
document.
121-
quote_attr_values="legacy"|"spec"|True
123+
quote_attr_values="legacy"|"spec"|"always"
122124
Whether to quote attribute values that don't require quoting
123-
per legacy browser behaviour, HTML authoring rules, or always.
125+
per legacy browser behaviour, when required by the standard, or always.
124126
quote_char=u'"'|u"'"
125127
Use given quote character for attribute quoting. Default is to
126128
use double quote unless attribute value contains a double quote,
@@ -249,10 +251,15 @@ def serialize(self, treewalker, encoding=None):
249251
(k not in booleanAttributes.get(name, tuple())
250252
and k not in booleanAttributes.get("", tuple())):
251253
yield self.encodeStrict("=")
252-
if self.quote_attr_values or len(v) == 0:
254+
if self.quote_attr_values == "always" or len(v) == 0:
253255
quote_attr = True
254-
elif :
255-
quoteAttributeSpec.search(v)
256+
elif self.quote_attr_values == "spec":
257+
quote_attr = quoteAttributeSpec.search(v) is not None
258+
elif self.quote_attr_values == "legacy":
259+
quote_attr = quoteAttributeLegacy.search(v) is not None
260+
else:
261+
raise ValueError("quote_attr_values must be one of: "
262+
"'always', 'spec', or 'legacy'")
256263
v = v.replace("&", "&amp;")
257264
if self.escape_lt_in_attrs:
258265
v = v.replace("<", "&lt;")

0 commit comments

Comments
 (0)