diff --git a/CHANGES.rst b/CHANGES.rst index 62a6a233..1b557816 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -33,6 +33,13 @@ Released on XXX * **Use scripting disabled by default (as we don't implement scripting).** +* **Fix #11, avoiding the XSS bug potentially caused by serializer + allowing attribute values to be escaped out of in old browser versions, + changing the quote_attr_values option on serializer to take one of + three values, "always" (the old True value), "legacy" (the new option, + and the new default), and "spec" (the old False value, and the old + default).** + 0.9999999/1.0b8 ~~~~~~~~~~~~~~~ diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index 3ec63d72..af231d8e 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -10,6 +10,10 @@ class Filter(_base.Filter): + def __init__(self, source, require_matching_tags=True): + super(Filter, self).__init__(source) + self.require_matching_tags = require_matching_tags + def __iter__(self): open_elements = [] for token in _base.Filter.__iter__(self): @@ -26,7 +30,7 @@ def __iter__(self): assert type == "EmptyTag" else: assert type == "StartTag" - if type == "StartTag": + if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): assert namespace is None or isinstance(namespace, text_type) @@ -44,7 +48,7 @@ def __iter__(self): assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} - else: + elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) diff --git a/html5lib/serializer/htmlserializer.py b/html5lib/serializer/htmlserializer.py index b87d9a75..afe2e0e2 100644 --- a/html5lib/serializer/htmlserializer.py +++ b/html5lib/serializer/htmlserializer.py @@ -1,10 +1,7 @@ from __future__ import absolute_import, division, unicode_literals from six import text_type -try: - from functools import reduce -except ImportError: - pass +import re from ..constants import voidElements, booleanAttributes, spaceCharacters from ..constants import rcdataElements, entities, xmlEntities @@ -13,6 +10,17 @@ spaceCharacters = "".join(spaceCharacters) +quoteAttributeSpecChars = spaceCharacters + "\"'=<>`" +quoteAttributeSpec = re.compile("[" + quoteAttributeSpecChars + "]") +quoteAttributeLegacy = re.compile("[" + quoteAttributeSpecChars + + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" + "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" + "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" + "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" + "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" + "\u3000]") + try: from codecs import register_error, xmlcharrefreplace_errors except ImportError: @@ -73,7 +81,7 @@ def htmlentityreplace_errors(exc): class HTMLSerializer(object): # attribute quoting options - quote_attr_values = False + quote_attr_values = "legacy" # be secure by default quote_char = '"' use_best_quote_char = True @@ -109,9 +117,9 @@ def __init__(self, **kwargs): inject_meta_charset=True|False Whether it insert a meta element to define the character set of the document. - quote_attr_values=True|False + quote_attr_values="legacy"|"spec"|"always" Whether to quote attribute values that don't require quoting - per HTML5 parsing rules. + per legacy browser behaviour, when required by the standard, or always. quote_char=u'"'|u"'" Use given quote character for attribute quoting. Default is to use double quote unless attribute value contains a double quote, @@ -240,11 +248,15 @@ def serialize(self, treewalker, encoding=None): (k not in booleanAttributes.get(name, tuple()) and k not in booleanAttributes.get("", tuple())): yield self.encodeStrict("=") - if self.quote_attr_values or not v: + if self.quote_attr_values == "always" or len(v) == 0: quote_attr = True + elif self.quote_attr_values == "spec": + quote_attr = quoteAttributeSpec.search(v) is not None + elif self.quote_attr_values == "legacy": + quote_attr = quoteAttributeLegacy.search(v) is not None else: - quote_attr = reduce(lambda x, y: x or (y in v), - spaceCharacters + ">\"'=", False) + raise ValueError("quote_attr_values must be one of: " + "'always', 'spec', or 'legacy'") v = v.replace("&", "&") if self.escape_lt_in_attrs: v = v.replace("<", "<") diff --git a/html5lib/tests/serializer-testdata/core.test b/html5lib/tests/serializer-testdata/core.test new file mode 100644 index 00000000..70828d0d --- /dev/null +++ b/html5lib/tests/serializer-testdata/core.test @@ -0,0 +1,395 @@ +{ + "tests": [ + { + "expected": [ + "<span title='test \"with\" &quot;'>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "test \"with\" "" + } + ] + ] + ], + "description": "proper attribute value escaping" + }, + { + "expected": [ + "<span title=foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo" + } + ] + ] + ], + "description": "proper attribute value non-quoting" + }, + { + "expected": [ + "<span title=\"foo<bar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo<bar" + } + ] + ] + ], + "description": "proper attribute value non-quoting (with <)" + }, + { + "expected": [ + "<span title=\"foo=bar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo=bar" + } + ] + ] + ], + "description": "proper attribute value quoting (with =)" + }, + { + "expected": [ + "<span title=\"foo>bar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo>bar" + } + ] + ] + ], + "description": "proper attribute value quoting (with >)" + }, + { + "expected": [ + "<span title='foo\"bar'>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\"bar" + } + ] + ] + ], + "description": "proper attribute value quoting (with \")" + }, + { + "expected": [ + "<span title=\"foo'bar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo'bar" + } + ] + ] + ], + "description": "proper attribute value quoting (with ')" + }, + { + "expected": [ + "<span title=\"foo'bar"baz\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo'bar\"baz" + } + ] + ] + ], + "description": "proper attribute value quoting (with both \" and ')" + }, + { + "expected": [ + "<span title=\"foo bar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo bar" + } + ] + ] + ], + "description": "proper attribute value quoting (with space)" + }, + { + "expected": [ + "<span title=\"foo\tbar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\tbar" + } + ] + ] + ], + "description": "proper attribute value quoting (with tab)" + }, + { + "expected": [ + "<span title=\"foo\nbar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\nbar" + } + ] + ] + ], + "description": "proper attribute value quoting (with LF)" + }, + { + "expected": [ + "<span title=\"foo\rbar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\rbar" + } + ] + ] + ], + "description": "proper attribute value quoting (with CR)" + }, + { + "expected": [ + "<span title=\"foo\u000bbar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\u000bbar" + } + ] + ] + ], + "description": "proper attribute value non-quoting (with linetab)" + }, + { + "expected": [ + "<span title=\"foo\fbar\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "foo\fbar" + } + ] + ] + ], + "description": "proper attribute value quoting (with form feed)" + }, + { + "expected": [ + "<img>" + ], + "input": [ + [ + "EmptyTag", + "img", + {} + ] + ], + "description": "void element (as EmptyTag token)" + }, + { + "expected": [ + "<!DOCTYPE foo>" + ], + "input": [ + [ + "Doctype", + "foo" + ] + ], + "description": "doctype in error" + }, + { + "expected": [ + "a<b>c&d" + ], + "input": [ + [ + "Characters", + "a<b>c&d" + ] + ], + "description": "character data", + "options": { + "encoding": "utf-8" + } + }, + { + "expected": [ + "<script>a<b>c&d" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "script", + {} + ], + [ + "Characters", + "a<b>c&d" + ] + ], + "description": "rcdata" + }, + { + "expected": [ + "<!DOCTYPE HTML>" + ], + "input": [ + [ + "Doctype", + "HTML" + ] + ], + "description": "doctype" + }, + { + "expected": [ + "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">" + ], + "input": [ + [ + "Doctype", + "HTML", + "-//W3C//DTD HTML 4.01//EN", + "http://www.w3.org/TR/html4/strict.dtd" + ] + ], + "description": "HTML 4.01 DOCTYPE" + }, + { + "expected": [ + "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\">" + ], + "input": [ + [ + "Doctype", + "HTML", + "-//W3C//DTD HTML 4.01//EN" + ] + ], + "description": "HTML 4.01 DOCTYPE without system identifer" + }, + { + "expected": [ + "<!DOCTYPE html SYSTEM \"http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd\">" + ], + "input": [ + [ + "Doctype", + "html", + "", + "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd" + ] + ], + "description": "IBM DOCTYPE without public identifer" + } + ] +} diff --git a/html5lib/tests/serializer-testdata/injectmeta.test b/html5lib/tests/serializer-testdata/injectmeta.test new file mode 100644 index 00000000..399590c3 --- /dev/null +++ b/html5lib/tests/serializer-testdata/injectmeta.test @@ -0,0 +1,350 @@ +{ + "tests": [ + { + "expected": [ + "" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "no encoding", + "options": { + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta charset=utf-8>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "empytag head", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta charset=utf-8><title>foo</title>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "title", + {} + ], + [ + "Characters", + "foo" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "title" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/title", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta charset=utf-8>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "charset", + "value": "ascii" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/meta-charset", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta charset=utf-8><meta charset=utf-8>", + "<head><meta charset=utf-8><meta charset=ascii>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "charset", + "value": "ascii" + } + ] + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "charset", + "value": "ascii" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/ two meta-charset", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta charset=utf-8><meta content=noindex name=robots>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "name", + "value": "robots" + }, + { + "namespace": null, + "name": "content", + "value": "noindex" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/robots", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta content=noindex name=robots><meta charset=utf-8>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "name", + "value": "robots" + }, + { + "namespace": null, + "name": "content", + "value": "noindex" + } + ] + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "charset", + "value": "ascii" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/robots & charset", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta content=\"text/html; charset=utf-8\" http-equiv=content-type>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "http-equiv", + "value": "content-type" + }, + { + "namespace": null, + "name": "content", + "value": "text/html; charset=ascii" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/ charset in http-equiv content-type", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + }, + { + "expected": [ + "<meta content=noindex name=robots><meta content=\"text/html; charset=utf-8\" http-equiv=content-type>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "name", + "value": "robots" + }, + { + "namespace": null, + "name": "content", + "value": "noindex" + } + ] + ], + [ + "EmptyTag", + "meta", + [ + { + "namespace": null, + "name": "http-equiv", + "value": "content-type" + }, + { + "namespace": null, + "name": "content", + "value": "text/html; charset=ascii" + } + ] + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head w/robots & charset in http-equiv content-type", + "options": { + "encoding": "utf-8", + "inject_meta_charset": true + } + } + ] +} \ No newline at end of file diff --git a/html5lib/tests/serializer-testdata/optionaltags.test b/html5lib/tests/serializer-testdata/optionaltags.test new file mode 100644 index 00000000..e67725ca --- /dev/null +++ b/html5lib/tests/serializer-testdata/optionaltags.test @@ -0,0 +1,3254 @@ +{ + "tests": [ + { + "expected": [ + "<html lang=en>foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + [ + { + "namespace": null, + "name": "lang", + "value": "en" + } + ] + ], + [ + "Characters", + "foo" + ] + ], + "description": "html start-tag followed by text, with attributes" + }, + { + "expected": [ + "<html><!--foo-->" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ], + [ + "Comment", + "foo" + ] + ], + "description": "html start-tag followed by comment" + }, + { + "expected": [ + "<html> foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ], + [ + "Characters", + " foo" + ] + ], + "description": "html start-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ], + [ + "Characters", + "foo" + ] + ], + "description": "html start-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "html start-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "html start-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "html", + {} + ] + ], + "description": "html start-tag at EOF (shouldn't ever happen?!)" + }, + { + "expected": [ + "</html><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ], + [ + "Comment", + "foo" + ] + ], + "description": "html end-tag followed by comment" + }, + { + "expected": [ + "</html> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ], + [ + "Characters", + " foo" + ] + ], + "description": "html end-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ], + [ + "Characters", + "foo" + ] + ], + "description": "html end-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "html end-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "html end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "html" + ] + ], + "description": "html end-tag at EOF" + }, + { + "expected": [ + "<head><!--foo-->" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "Comment", + "foo" + ] + ], + "description": "head start-tag followed by comment" + }, + { + "expected": [ + "<head> foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "Characters", + " foo" + ] + ], + "description": "head start-tag followed by space character" + }, + { + "expected": [ + "<head>foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "Characters", + "foo" + ] + ], + "description": "head start-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "head start-tag followed by start-tag" + }, + { + "expected": [ + "<head></foo>", + "</foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "head start-tag followed by end-tag (shouldn't ever happen?!)" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "empty head element" + }, + { + "expected": [ + "<meta>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ], + [ + "EmptyTag", + "meta", + {} + ] + ], + "description": "head start-tag followed by empty-tag" + }, + { + "expected": [ + "<head>", + "" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "head", + {} + ] + ], + "description": "head start-tag at EOF (shouldn't ever happen?!)" + }, + { + "expected": [ + "</head><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ], + [ + "Comment", + "foo" + ] + ], + "description": "head end-tag followed by comment" + }, + { + "expected": [ + "</head> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ], + [ + "Characters", + " foo" + ] + ], + "description": "head end-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ], + [ + "Characters", + "foo" + ] + ], + "description": "head end-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "head end-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "head end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "head" + ] + ], + "description": "head end-tag at EOF" + }, + { + "expected": [ + "<body><!--foo-->" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ], + [ + "Comment", + "foo" + ] + ], + "description": "body start-tag followed by comment" + }, + { + "expected": [ + "<body> foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ], + [ + "Characters", + " foo" + ] + ], + "description": "body start-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ], + [ + "Characters", + "foo" + ] + ], + "description": "body start-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "body start-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "body start-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "body", + {} + ] + ], + "description": "body start-tag at EOF (shouldn't ever happen?!)" + }, + { + "expected": [ + "</body><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ], + [ + "Comment", + "foo" + ] + ], + "description": "body end-tag followed by comment" + }, + { + "expected": [ + "</body> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ], + [ + "Characters", + " foo" + ] + ], + "description": "body end-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ], + [ + "Characters", + "foo" + ] + ], + "description": "body end-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "body end-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "body end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "body" + ] + ], + "description": "body end-tag at EOF" + }, + { + "expected": [ + "</li><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "Comment", + "foo" + ] + ], + "description": "li end-tag followed by comment" + }, + { + "expected": [ + "</li> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "Characters", + " foo" + ] + ], + "description": "li end-tag followed by space character" + }, + { + "expected": [ + "</li>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "Characters", + "foo" + ] + ], + "description": "li end-tag followed by text" + }, + { + "expected": [ + "</li><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "li end-tag followed by start-tag" + }, + { + "expected": [ + "<li>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "li", + {} + ] + ], + "description": "li end-tag followed by li start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "li end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "li" + ] + ], + "description": "li end-tag at EOF" + }, + { + "expected": [ + "</dt><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "Comment", + "foo" + ] + ], + "description": "dt end-tag followed by comment" + }, + { + "expected": [ + "</dt> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "Characters", + " foo" + ] + ], + "description": "dt end-tag followed by space character" + }, + { + "expected": [ + "</dt>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "Characters", + "foo" + ] + ], + "description": "dt end-tag followed by text" + }, + { + "expected": [ + "</dt><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "dt end-tag followed by start-tag" + }, + { + "expected": [ + "<dt>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dt", + {} + ] + ], + "description": "dt end-tag followed by dt start-tag" + }, + { + "expected": [ + "<dd>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dd", + {} + ] + ], + "description": "dt end-tag followed by dd start-tag" + }, + { + "expected": [ + "</dt></foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "dt end-tag followed by end-tag" + }, + { + "expected": [ + "</dt>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dt" + ] + ], + "description": "dt end-tag at EOF" + }, + { + "expected": [ + "</dd><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "Comment", + "foo" + ] + ], + "description": "dd end-tag followed by comment" + }, + { + "expected": [ + "</dd> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "Characters", + " foo" + ] + ], + "description": "dd end-tag followed by space character" + }, + { + "expected": [ + "</dd>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "Characters", + "foo" + ] + ], + "description": "dd end-tag followed by text" + }, + { + "expected": [ + "</dd><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "dd end-tag followed by start-tag" + }, + { + "expected": [ + "<dd>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dd", + {} + ] + ], + "description": "dd end-tag followed by dd start-tag" + }, + { + "expected": [ + "<dt>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dt", + {} + ] + ], + "description": "dd end-tag followed by dt start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "dd end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "dd" + ] + ], + "description": "dd end-tag at EOF" + }, + { + "expected": [ + "</p><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "Comment", + "foo" + ] + ], + "description": "p end-tag followed by comment" + }, + { + "expected": [ + "</p> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "Characters", + " foo" + ] + ], + "description": "p end-tag followed by space character" + }, + { + "expected": [ + "</p>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "Characters", + "foo" + ] + ], + "description": "p end-tag followed by text" + }, + { + "expected": [ + "</p><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "p end-tag followed by start-tag" + }, + { + "expected": [ + "<address>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "address", + {} + ] + ], + "description": "p end-tag followed by address start-tag" + }, + { + "expected": [ + "<article>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "article", + {} + ] + ], + "description": "p end-tag followed by article start-tag" + }, + { + "expected": [ + "<aside>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "aside", + {} + ] + ], + "description": "p end-tag followed by aside start-tag" + }, + { + "expected": [ + "<blockquote>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "blockquote", + {} + ] + ], + "description": "p end-tag followed by blockquote start-tag" + }, + { + "expected": [ + "<datagrid>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "datagrid", + {} + ] + ], + "description": "p end-tag followed by datagrid start-tag" + }, + { + "expected": [ + "<dialog>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dialog", + {} + ] + ], + "description": "p end-tag followed by dialog start-tag" + }, + { + "expected": [ + "<dir>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dir", + {} + ] + ], + "description": "p end-tag followed by dir start-tag" + }, + { + "expected": [ + "<div>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + {} + ] + ], + "description": "p end-tag followed by div start-tag" + }, + { + "expected": [ + "<dl>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "dl", + {} + ] + ], + "description": "p end-tag followed by dl start-tag" + }, + { + "expected": [ + "<fieldset>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "fieldset", + {} + ] + ], + "description": "p end-tag followed by fieldset start-tag" + }, + { + "expected": [ + "<footer>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "footer", + {} + ] + ], + "description": "p end-tag followed by footer start-tag" + }, + { + "expected": [ + "<form>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "form", + {} + ] + ], + "description": "p end-tag followed by form start-tag" + }, + { + "expected": [ + "<h1>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h1", + {} + ] + ], + "description": "p end-tag followed by h1 start-tag" + }, + { + "expected": [ + "<h2>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h2", + {} + ] + ], + "description": "p end-tag followed by h2 start-tag" + }, + { + "expected": [ + "<h3>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h3", + {} + ] + ], + "description": "p end-tag followed by h3 start-tag" + }, + { + "expected": [ + "<h4>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h4", + {} + ] + ], + "description": "p end-tag followed by h4 start-tag" + }, + { + "expected": [ + "<h5>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h5", + {} + ] + ], + "description": "p end-tag followed by h5 start-tag" + }, + { + "expected": [ + "<h6>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "h6", + {} + ] + ], + "description": "p end-tag followed by h6 start-tag" + }, + { + "expected": [ + "<header>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "header", + {} + ] + ], + "description": "p end-tag followed by header start-tag" + }, + { + "expected": [ + "<hr>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "EmptyTag", + "hr", + {} + ] + ], + "description": "p end-tag followed by hr empty-tag" + }, + { + "expected": [ + "<menu>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "menu", + {} + ] + ], + "description": "p end-tag followed by menu start-tag" + }, + { + "expected": [ + "<nav>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "nav", + {} + ] + ], + "description": "p end-tag followed by nav start-tag" + }, + { + "expected": [ + "<ol>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "ol", + {} + ] + ], + "description": "p end-tag followed by ol start-tag" + }, + { + "expected": [ + "<p>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "p", + {} + ] + ], + "description": "p end-tag followed by p start-tag" + }, + { + "expected": [ + "<pre>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "pre", + {} + ] + ], + "description": "p end-tag followed by pre start-tag" + }, + { + "expected": [ + "<section>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "section", + {} + ] + ], + "description": "p end-tag followed by section start-tag" + }, + { + "expected": [ + "<table>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "table", + {} + ] + ], + "description": "p end-tag followed by table start-tag" + }, + { + "expected": [ + "<ul>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "ul", + {} + ] + ], + "description": "p end-tag followed by ul start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "p end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "p" + ] + ], + "description": "p end-tag at EOF" + }, + { + "expected": [ + "</optgroup><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "Comment", + "foo" + ] + ], + "description": "optgroup end-tag followed by comment" + }, + { + "expected": [ + "</optgroup> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "Characters", + " foo" + ] + ], + "description": "optgroup end-tag followed by space character" + }, + { + "expected": [ + "</optgroup>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "Characters", + "foo" + ] + ], + "description": "optgroup end-tag followed by text" + }, + { + "expected": [ + "</optgroup><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "optgroup end-tag followed by start-tag" + }, + { + "expected": [ + "<optgroup>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "optgroup", + {} + ] + ], + "description": "optgroup end-tag followed by optgroup start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "optgroup end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "optgroup" + ] + ], + "description": "optgroup end-tag at EOF" + }, + { + "expected": [ + "</option><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "Comment", + "foo" + ] + ], + "description": "option end-tag followed by comment" + }, + { + "expected": [ + "</option> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "Characters", + " foo" + ] + ], + "description": "option end-tag followed by space character" + }, + { + "expected": [ + "</option>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "Characters", + "foo" + ] + ], + "description": "option end-tag followed by text" + }, + { + "expected": [ + "<optgroup>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "optgroup", + {} + ] + ], + "description": "option end-tag followed by optgroup start-tag" + }, + { + "expected": [ + "</option><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "option end-tag followed by start-tag" + }, + { + "expected": [ + "<option>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "option", + {} + ] + ], + "description": "option end-tag followed by option start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "option end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "option" + ] + ], + "description": "option end-tag at EOF" + }, + { + "expected": [ + "<colgroup><!--foo-->" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "Comment", + "foo" + ] + ], + "description": "colgroup start-tag followed by comment" + }, + { + "expected": [ + "<colgroup> foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "Characters", + " foo" + ] + ], + "description": "colgroup start-tag followed by space character" + }, + { + "expected": [ + "<colgroup>foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "Characters", + "foo" + ] + ], + "description": "colgroup start-tag followed by text" + }, + { + "expected": [ + "<colgroup><foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "colgroup start-tag followed by start-tag" + }, + { + "expected": [ + "<table><col>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "table", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "EmptyTag", + "col", + {} + ] + ], + "description": "first colgroup in a table with a col child" + }, + { + "expected": [ + "</colgroup><col>", + "<colgroup><col>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "EmptyTag", + "http://www.w3.org/1999/xhtml", + "col", + {} + ] + ], + "description": "colgroup with a col child, following another colgroup" + }, + { + "expected": [ + "<colgroup></foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "colgroup start-tag followed by end-tag" + }, + { + "expected": [ + "<colgroup>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "colgroup", + {} + ] + ], + "description": "colgroup start-tag at EOF" + }, + { + "expected": [ + "</colgroup><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "Comment", + "foo" + ] + ], + "description": "colgroup end-tag followed by comment" + }, + { + "expected": [ + "</colgroup> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "Characters", + " foo" + ] + ], + "description": "colgroup end-tag followed by space character" + }, + { + "expected": [ + "foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "Characters", + "foo" + ] + ], + "description": "colgroup end-tag followed by text" + }, + { + "expected": [ + "<foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "colgroup end-tag followed by start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "colgroup end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "colgroup" + ] + ], + "description": "colgroup end-tag at EOF" + }, + { + "expected": [ + "</thead><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "Comment", + "foo" + ] + ], + "description": "thead end-tag followed by comment" + }, + { + "expected": [ + "</thead> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "Characters", + " foo" + ] + ], + "description": "thead end-tag followed by space character" + }, + { + "expected": [ + "</thead>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "Characters", + "foo" + ] + ], + "description": "thead end-tag followed by text" + }, + { + "expected": [ + "</thead><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "thead end-tag followed by start-tag" + }, + { + "expected": [ + "<tbody>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ] + ], + "description": "thead end-tag followed by tbody start-tag" + }, + { + "expected": [ + "<tfoot>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tfoot", + {} + ] + ], + "description": "thead end-tag followed by tfoot start-tag" + }, + { + "expected": [ + "</thead></foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "thead end-tag followed by end-tag" + }, + { + "expected": [ + "</thead>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ] + ], + "description": "thead end-tag at EOF" + }, + { + "expected": [ + "<tbody><!--foo-->" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "Comment", + "foo" + ] + ], + "description": "tbody start-tag followed by comment" + }, + { + "expected": [ + "<tbody> foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "Characters", + " foo" + ] + ], + "description": "tbody start-tag followed by space character" + }, + { + "expected": [ + "<tbody>foo" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "Characters", + "foo" + ] + ], + "description": "tbody start-tag followed by text" + }, + { + "expected": [ + "<tbody><foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "tbody start-tag followed by start-tag" + }, + { + "expected": [ + "<table><tr>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "table", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tr", + {} + ] + ], + "description": "first tbody in a table with a tr child" + }, + { + "expected": [ + "<tbody><tr>", + "</tbody><tr>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tr", + {} + ] + ], + "description": "tbody with a tr child, following another tbody" + }, + { + "expected": [ + "<tbody><tr>", + "</thead><tr>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "thead" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tr", + {} + ] + ], + "description": "tbody with a tr child, following a thead" + }, + { + "expected": [ + "<tbody><tr>", + "</tfoot><tr>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tr", + {} + ] + ], + "description": "tbody with a tr child, following a tfoot" + }, + { + "expected": [ + "<tbody></foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "tbody start-tag followed by end-tag" + }, + { + "expected": [ + "<tbody>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ] + ], + "description": "tbody start-tag at EOF" + }, + { + "expected": [ + "</tbody><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "Comment", + "foo" + ] + ], + "description": "tbody end-tag followed by comment" + }, + { + "expected": [ + "</tbody> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "Characters", + " foo" + ] + ], + "description": "tbody end-tag followed by space character" + }, + { + "expected": [ + "</tbody>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "Characters", + "foo" + ] + ], + "description": "tbody end-tag followed by text" + }, + { + "expected": [ + "</tbody><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "tbody end-tag followed by start-tag" + }, + { + "expected": [ + "<tbody>", + "</tbody>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ] + ], + "description": "tbody end-tag followed by tbody start-tag" + }, + { + "expected": [ + "<tfoot>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tfoot", + {} + ] + ], + "description": "tbody end-tag followed by tfoot start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "tbody end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tbody" + ] + ], + "description": "tbody end-tag at EOF" + }, + { + "expected": [ + "</tfoot><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "Comment", + "foo" + ] + ], + "description": "tfoot end-tag followed by comment" + }, + { + "expected": [ + "</tfoot> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "Characters", + " foo" + ] + ], + "description": "tfoot end-tag followed by space character" + }, + { + "expected": [ + "</tfoot>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "Characters", + "foo" + ] + ], + "description": "tfoot end-tag followed by text" + }, + { + "expected": [ + "</tfoot><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "tfoot end-tag followed by start-tag" + }, + { + "expected": [ + "<tbody>", + "</tfoot>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tbody", + {} + ] + ], + "description": "tfoot end-tag followed by tbody start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "tfoot end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tfoot" + ] + ], + "description": "tfoot end-tag at EOF" + }, + { + "expected": [ + "</tr><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "Comment", + "foo" + ] + ], + "description": "tr end-tag followed by comment" + }, + { + "expected": [ + "</tr> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "Characters", + " foo" + ] + ], + "description": "tr end-tag followed by space character" + }, + { + "expected": [ + "</tr>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "Characters", + "foo" + ] + ], + "description": "tr end-tag followed by text" + }, + { + "expected": [ + "</tr><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "tr end-tag followed by start-tag" + }, + { + "expected": [ + "<tr>", + "</tr>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "tr", + {} + ] + ], + "description": "tr end-tag followed by tr start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "tr end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "tr" + ] + ], + "description": "tr end-tag at EOF" + }, + { + "expected": [ + "</td><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "Comment", + "foo" + ] + ], + "description": "td end-tag followed by comment" + }, + { + "expected": [ + "</td> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "Characters", + " foo" + ] + ], + "description": "td end-tag followed by space character" + }, + { + "expected": [ + "</td>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "Characters", + "foo" + ] + ], + "description": "td end-tag followed by text" + }, + { + "expected": [ + "</td><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "td end-tag followed by start-tag" + }, + { + "expected": [ + "<td>", + "</td>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "td", + {} + ] + ], + "description": "td end-tag followed by td start-tag" + }, + { + "expected": [ + "<th>", + "</td>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "th", + {} + ] + ], + "description": "td end-tag followed by th start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "td end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "td" + ] + ], + "description": "td end-tag at EOF" + }, + { + "expected": [ + "</th><!--foo-->" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "Comment", + "foo" + ] + ], + "description": "th end-tag followed by comment" + }, + { + "expected": [ + "</th> foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "Characters", + " foo" + ] + ], + "description": "th end-tag followed by space character" + }, + { + "expected": [ + "</th>foo" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "Characters", + "foo" + ] + ], + "description": "th end-tag followed by text" + }, + { + "expected": [ + "</th><foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "foo", + {} + ] + ], + "description": "th end-tag followed by start-tag" + }, + { + "expected": [ + "<th>", + "</th>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "th", + {} + ] + ], + "description": "th end-tag followed by th start-tag" + }, + { + "expected": [ + "<td>", + "</th>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "td", + {} + ] + ], + "description": "th end-tag followed by td start-tag" + }, + { + "expected": [ + "</foo>" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "foo" + ] + ], + "description": "th end-tag followed by end-tag" + }, + { + "expected": [ + "" + ], + "input": [ + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "th" + ] + ], + "description": "th end-tag at EOF" + } + ] +} \ No newline at end of file diff --git a/html5lib/tests/serializer-testdata/options.test b/html5lib/tests/serializer-testdata/options.test new file mode 100644 index 00000000..eedcb3f0 --- /dev/null +++ b/html5lib/tests/serializer-testdata/options.test @@ -0,0 +1,265 @@ +{ + "tests": [ + { + "expected": [ + "<span title='test 'with' quote_char'>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + [ + { + "namespace": null, + "name": "title", + "value": "test 'with' quote_char" + } + ] + ] + ], + "description": "quote_char=\"'\"", + "options": { + "quote_char": "'" + } + }, + { + "expected": [ + "<button disabled>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "button", + [ + { + "namespace": null, + "name": "disabled", + "value": "disabled" + } + ] + ] + ], + "description": "quote_attr_values='always'", + "options": { + "quote_attr_values": "always" + } + }, + { + "expected": [ + "<div irrelevant>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "irrelevant", + "value": "irrelevant" + } + ] + ] + ], + "description": "quote_attr_values='always' with irrelevant", + "options": { + "quote_attr_values": "always" + } + }, + { + "expected": [ + "<div class=\"foo\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "class", + "value": "foo" + } + ] + ] + ], + "description": "non-minimized quote_attr_values='always'", + "options": { + "quote_attr_values": "always" + } + }, + { + "expected": [ + "<div class=foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "class", + "value": "foo" + } + ] + ] + ], + "description": "non-minimized quote_attr_values='legacy'", + "options": { + "quote_attr_values": "legacy" + } + }, + { + "expected": [ + "<div class=foo>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "class", + "value": "foo" + } + ] + ] + ], + "description": "non-minimized quote_attr_values='spec'", + "options": { + "quote_attr_values": "spec" + } + }, + { + "expected": [ + "<img />" + ], + "input": [ + [ + "EmptyTag", + "img", + {} + ] + ], + "description": "use_trailing_solidus=true with void element", + "options": { + "use_trailing_solidus": true + } + }, + { + "expected": [ + "<div>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + {} + ] + ], + "description": "use_trailing_solidus=true with non-void element", + "options": { + "use_trailing_solidus": true + } + }, + { + "expected": [ + "<div irrelevant=irrelevant>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "irrelevant", + "value": "irrelevant" + } + ] + ] + ], + "description": "minimize_boolean_attributes=false", + "options": { + "minimize_boolean_attributes": false + } + }, + { + "expected": [ + "<div irrelevant=\"\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "div", + [ + { + "namespace": null, + "name": "irrelevant", + "value": "" + } + ] + ] + ], + "description": "minimize_boolean_attributes=false with empty value", + "options": { + "minimize_boolean_attributes": false + } + }, + { + "expected": [ + "<a title=\"a<b>c&d\">" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "a", + [ + { + "namespace": null, + "name": "title", + "value": "a<b>c&d" + } + ] + ] + ], + "description": "escape less than signs in attribute values", + "options": { + "escape_lt_in_attrs": true + } + }, + { + "expected": [ + "<script>a<b>c&d" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "script", + {} + ], + [ + "Characters", + "a<b>c&d" + ] + ], + "description": "rcdata", + "options": { + "escape_rcdata": true + } + } + ] +} \ No newline at end of file diff --git a/html5lib/tests/serializer-testdata/whitespace.test b/html5lib/tests/serializer-testdata/whitespace.test new file mode 100644 index 00000000..dac3a69e --- /dev/null +++ b/html5lib/tests/serializer-testdata/whitespace.test @@ -0,0 +1,198 @@ +{ + "tests": [ + { + "expected": [ + " foo" + ], + "input": [ + [ + "Characters", + "\t\r\n\f foo" + ] + ], + "description": "bare text with leading spaces", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "foo " + ], + "input": [ + [ + "Characters", + "foo \t\r\n\f" + ] + ], + "description": "bare text with trailing spaces", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "foo bar" + ], + "input": [ + [ + "Characters", + "foo \t\r\n\f bar" + ] + ], + "description": "bare text with inner spaces", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "<pre>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</pre>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "pre", + {} + ], + [ + "Characters", + "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "pre" + ] + ], + "description": "text within <pre>", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "<pre>\t\r\n\f fo<span>o \t\r\n\f b</span>ar \t\r\n\f</pre>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "pre", + {} + ], + [ + "Characters", + "\t\r\n\f fo" + ], + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "span", + {} + ], + [ + "Characters", + "o \t\r\n\f b" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "span" + ], + [ + "Characters", + "ar \t\r\n\f" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "pre" + ] + ], + "description": "text within <pre>, with inner markup", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "<textarea>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</textarea>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "textarea", + {} + ], + [ + "Characters", + "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "textarea" + ] + ], + "description": "text within <textarea>", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "<script>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</script>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "script", + {} + ], + [ + "Characters", + "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "script" + ] + ], + "description": "text within <script>", + "options": { + "strip_whitespace": true + } + }, + { + "expected": [ + "<style>\t\r\n\f foo \t\r\n\f bar \t\r\n\f</style>" + ], + "input": [ + [ + "StartTag", + "http://www.w3.org/1999/xhtml", + "style", + {} + ], + [ + "Characters", + "\t\r\n\f foo \t\r\n\f bar \t\r\n\f" + ], + [ + "EndTag", + "http://www.w3.org/1999/xhtml", + "style" + ] + ], + "description": "text within <style>", + "options": { + "strip_whitespace": true + } + } + ] +} \ No newline at end of file diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 54a64a85..6e6a916b 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -69,8 +69,8 @@ } -def get_data_files(subdirectory, files='*.dat'): - return sorted(glob.glob(os.path.join(test_dir, subdirectory, files))) +def get_data_files(subdirectory, files='*.dat', search_dir=test_dir): + return sorted(glob.glob(os.path.join(search_dir, subdirectory, files))) class DefaultDict(dict): diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index af76075e..93276267 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -1,17 +1,14 @@ from __future__ import absolute_import, division, unicode_literals +import os import json -import unittest -from .support import get_data_files +import pytest -try: - unittest.TestCase.assertEqual -except AttributeError: - unittest.TestCase.assertEqual = unittest.TestCase.assertEquals +from .support import get_data_files -import html5lib from html5lib import constants +from html5lib.filters.lint import Filter as Lint from html5lib.serializer import HTMLSerializer, serialize from html5lib.treewalkers._base import TreeWalker @@ -82,7 +79,7 @@ def _convertAttrib(self, attribs): def serialize_html(input, options): options = dict([(str(k), v) for k, v in options.items()]) - stream = JsonWalker(input) + stream = Lint(JsonWalker(input), False) serializer = HTMLSerializer(alphabetical_attributes=True, **options) return serializer.render(stream, options.get("encoding", None)) @@ -100,77 +97,118 @@ def runSerializerTest(input, expected, options): assert False, "Expected: %s, Received: %s" % (expected, result) -class EncodingTestCase(unittest.TestCase): - def throwsWithLatin1(self, input): - self.assertRaises(UnicodeEncodeError, serialize_html, input, {"encoding": "iso-8859-1"}) +def throwsWithLatin1(input): + with pytest.raises(UnicodeEncodeError): + serialize_html(input, {"encoding": "iso-8859-1"}) + + +def testDoctypeName(): + throwsWithLatin1([["Doctype", "\u0101"]]) + + +def testDoctypePublicId(): + throwsWithLatin1([["Doctype", "potato", "\u0101"]]) + + +def testDoctypeSystemId(): + throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) + + +def testCdataCharacters(): + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], + ["<style>ā"], {"encoding": "iso-8859-1"}) + + +def testCharacters(): + runSerializerTest([["Characters", "\u0101"]], + ["ā"], {"encoding": "iso-8859-1"}) + + +def testStartTagName(): + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) + + +def testAttributeName(): + throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) + + +def testAttributeValue(): + runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", + [{"namespace": None, "name": "potato", "value": "\u0101"}]]], + ["<span potato=ā>"], {"encoding": "iso-8859-1"}) - def testDoctypeName(self): - self.throwsWithLatin1([["Doctype", "\u0101"]]) - def testDoctypePublicId(self): - self.throwsWithLatin1([["Doctype", "potato", "\u0101"]]) +def testEndTagName(): + throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) - def testDoctypeSystemId(self): - self.throwsWithLatin1([["Doctype", "potato", "potato", "\u0101"]]) - def testCdataCharacters(self): - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "style", {}], ["Characters", "\u0101"]], - ["<style>ā"], {"encoding": "iso-8859-1"}) +def testComment(): + throwsWithLatin1([["Comment", "\u0101"]]) - def testCharacters(self): - runSerializerTest([["Characters", "\u0101"]], - ["ā"], {"encoding": "iso-8859-1"}) - def testStartTagName(self): - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) +@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`")) +def testSpecQuoteAttribute(c): + input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span", + [{"namespace": None, "name": "foo", "value": c}]]] + if c == '"': + output_ = ["<span foo='%s'>" % c] + else: + output_ = ['<span foo="%s">' % c] + options_ = {"quote_attr_values": "spec"} + runSerializerTest(input_, output_, options_) - def testEmptyTagName(self): - self.throwsWithLatin1([["EmptyTag", "http://www.w3.org/1999/xhtml", "\u0101", []]]) - def testAttributeName(self): - self.throwsWithLatin1([["StartTag", "http://www.w3.org/1999/xhtml", "span", [{"namespace": None, "name": "\u0101", "value": "potato"}]]]) +@pytest.mark.parametrize("c", list("\t\n\u000C\x20\r\"'=<>`" + "\x00\x01\x02\x03\x04\x05\x06\x07\x08\t\n" + "\x0b\x0c\r\x0e\x0f\x10\x11\x12\x13\x14\x15" + "\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" + "\x20\x2f\x60\xa0\u1680\u180e\u180f\u2000" + "\u2001\u2002\u2003\u2004\u2005\u2006\u2007" + "\u2008\u2009\u200a\u2028\u2029\u202f\u205f" + "\u3000")) +def testLegacyQuoteAttribute(c): + input_ = [["StartTag", "http://www.w3.org/1999/xhtml", "span", + [{"namespace": None, "name": "foo", "value": c}]]] + if c == '"': + output_ = ["<span foo='%s'>" % c] + else: + output_ = ['<span foo="%s">' % c] + options_ = {"quote_attr_values": "legacy"} + runSerializerTest(input_, output_, options_) - def testAttributeValue(self): - runSerializerTest([["StartTag", "http://www.w3.org/1999/xhtml", "span", - [{"namespace": None, "name": "potato", "value": "\u0101"}]]], - ["<span potato=ā>"], {"encoding": "iso-8859-1"}) - def testEndTagName(self): - self.throwsWithLatin1([["EndTag", "http://www.w3.org/1999/xhtml", "\u0101"]]) +@pytest.fixture +def lxml_parser(): + return etree.XMLParser(resolve_entities=False) - def testComment(self): - self.throwsWithLatin1([["Comment", "\u0101"]]) +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") +def testEntityReplacement(lxml_parser): + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() + result = serialize(tree, tree="lxml", omit_optional_tags=False) + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>' -if "lxml" in optionals_loaded: - class LxmlTestCase(unittest.TestCase): - def setUp(self): - self.parser = etree.XMLParser(resolve_entities=False) - self.treewalker = html5lib.getTreeWalker("lxml") - self.serializer = HTMLSerializer() - def testEntityReplacement(self): - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" - tree = etree.fromstring(doc, parser=self.parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False) - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>\u03B2</html>""", result) +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") +def testEntityXML(lxml_parser): + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() + result = serialize(tree, tree="lxml", omit_optional_tags=False) + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>' - def testEntityXML(self): - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""" - tree = etree.fromstring(doc, parser=self.parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False) - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>></html>""", result) - def testEntityNoResolve(self): - doc = """<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""" - tree = etree.fromstring(doc, parser=self.parser).getroottree() - result = serialize(tree, tree="lxml", omit_optional_tags=False, - resolve_entities=False) - self.assertEqual("""<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>""", result) +@pytest.mark.skipif("lxml" not in optionals_loaded, reason="lxml not importable") +def testEntityNoResolve(lxml_parser): + doc = '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' + tree = etree.fromstring(doc, parser=lxml_parser).getroottree() + result = serialize(tree, tree="lxml", omit_optional_tags=False, + resolve_entities=False) + assert result == '<!DOCTYPE html SYSTEM "about:legacy-compat"><html>β</html>' def test_serializer(): - for filename in get_data_files('serializer', '*.test'): + for filename in get_data_files('serializer-testdata', '*.test', os.path.dirname(__file__)): with open(filename) as fp: tests = json.load(fp) for index, test in enumerate(tests['tests']):