From 222b75ce3e6736dd9ddaba1b9d966ca247f577f1 Mon Sep 17 00:00:00 2001 From: inikulin Date: Sat, 25 Mar 2017 19:23:52 +0300 Subject: [PATCH 01/82] Add control-or-undefined-character-in-input-stream parse error. --- tokenizer/test3.test | 430 ++++++++++++++++++++++++++------- tokenizer/test4.test | 15 +- tokenizer/unicodeChars.test | 470 ++++++++++++++++++++++++++++-------- 3 files changed, 732 insertions(+), 183 deletions(-) diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 8fc529a2..36db8a56 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -14,7 +14,10 @@ {"description":"\\u000B", "input":"\u000B", -"output":["ParseError", ["Character", "\u000B"]]}, +"output":["ParseError", ["Character", "\u000B"]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } +]}, {"description":"\\u000C", "input":"\u000C", @@ -94,7 +97,10 @@ {"description":"<\\u000B", "input":"<\u000B", -"output":["ParseError", "ParseError", ["Character", "<\u000B"]]}, +"output":["ParseError", "ParseError", ["Character", "<\u000B"]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 2 } +]}, {"description":"<\\u000C", "input":"<\u000C", @@ -122,7 +128,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a\u0008", {}]]}, +"output":["ParseError", ["StartTag", "a\u0008", {}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } +]}, {"description":"", "input":"", @@ -4558,7 +4747,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a\u000B", {}]]}, +"output":["ParseError", ["StartTag", "a\u000B", {}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } +]}, {"description":"", "input":"", @@ -4570,7 +4762,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a\u001F", {}]]}, +"output":["ParseError", ["StartTag", "a\u001F", {}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } +]}, {"description":"", "input":"", @@ -4582,7 +4777,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"\u0008":""}]]}, +"output":["ParseError", ["StartTag", "a", {"\u0008":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } +]}, {"description":"", "input":"", @@ -4594,7 +4792,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"\u000B":""}]]}, +"output":["ParseError", ["StartTag", "a", {"\u000B":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } +]}, {"description":"", "input":"", @@ -4606,7 +4807,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"\u001F":""}]]}, +"output":["ParseError", ["StartTag", "a", {"\u001F":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } +]}, {"description":"", "input":"", @@ -4714,7 +4918,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a\u0008":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a\u0008":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } +]}, {"description":"", "input":"", @@ -4726,7 +4933,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a\u000B":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a\u000B":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } +]}, {"description":"", "input":"", @@ -4738,7 +4948,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a\u001F":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a\u001F":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } +]}, {"description":"", "input":"", @@ -4750,7 +4963,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -4762,7 +4978,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -4774,7 +4993,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -4958,7 +5180,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"\u0008"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"\u0008"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -4970,7 +5195,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -4982,7 +5210,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"\u001F"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"\u001F"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } +]}, {"description":"", "input":"", @@ -5010,7 +5241,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } +]}, {"description":"", "input":"", @@ -5158,7 +5392,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } +]}, {"description":"", "input":"", @@ -5194,7 +5431,10 @@ {"description":"", "input":"", -"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]]}, +"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 } +]}, {"description":"", "input":"", @@ -5206,7 +5446,10 @@ {"description":"", "input":"", -"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]]}, +"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 } +]}, {"description":"", "input":"", @@ -5218,7 +5461,10 @@ {"description":"", "input":"", -"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]]}, +"output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 } +]}, {"description":"", "input":"", @@ -5490,7 +5736,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"a\u0008"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"a\u0008"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } +]}, {"description":"", "input":"", @@ -5502,7 +5751,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"a\u000B"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"a\u000B"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } +]}, {"description":"", "input":"", @@ -5514,7 +5766,10 @@ {"description":"", "input":"", -"output":["ParseError", ["StartTag", "a", {"a":"a\u001F"}]]}, +"output":["ParseError", ["StartTag", "a", {"a":"a\u001F"}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } +]}, {"description":"", "input":"", @@ -5774,7 +6029,10 @@ {"description":"", "input":"", -"output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]]}, +"output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]], +"errors":[ + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } +]}, {"description":"", "input":"", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 4be94b0c..fee3feff 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -233,17 +233,26 @@ {"description":"U+0080 in lookahead region", "input":" Date: Sat, 25 Mar 2017 20:10:25 +0300 Subject: [PATCH 02/82] Add non-unicode-character-in-input-stream parse error. --- tokenizer/unicodeCharsProblematic.test | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/tokenizer/unicodeCharsProblematic.test b/tokenizer/unicodeCharsProblematic.test index 2980ce22..380290a2 100644 --- a/tokenizer/unicodeCharsProblematic.test +++ b/tokenizer/unicodeCharsProblematic.test @@ -3,25 +3,37 @@ "doubleEscaped":true, "input": "\\uDFFF", "output":["ParseError", ["Character", "\\uDFFF"]], -"ignoreErrorOrder":true}, +"ignoreErrorOrder":true, +"errors":[ + { "code": "non-unicode-character-in-input-stream", "line": 1, "col": 1 } +]}, {"description": "Invalid Unicode character U+D800", "doubleEscaped":true, "input": "\\uD800", "output":["ParseError", ["Character", "\\uD800"]], -"ignoreErrorOrder":true}, +"ignoreErrorOrder":true, +"errors":[ + { "code": "non-unicode-character-in-input-stream", "line": 1, "col": 1 } +]}, {"description": "Invalid Unicode character U+DFFF with valid preceding character", "doubleEscaped":true, "input": "a\\uDFFF", "output":[["Character", "a"], "ParseError", ["Character", "\\uDFFF"]], -"ignoreErrorOrder":true}, +"ignoreErrorOrder":true, +"errors":[ + { "code": "non-unicode-character-in-input-stream", "line": 1, "col": 2 } +]}, {"description": "Invalid Unicode character U+D800 with valid following character", "doubleEscaped":true, "input": "\\uD800a", "output":["ParseError", ["Character", "\\uD800a"]], -"ignoreErrorOrder":true}, +"ignoreErrorOrder":true, +"errors":[ + { "code": "non-unicode-character-in-input-stream", "line": 1, "col": 1 } +]}, {"description":"CR followed by U+0000", "input":"\r\u0000", From 5a3b850056a3d3ec0ede15941c46a9cfec3be696 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 29 Mar 2017 12:50:26 +0300 Subject: [PATCH 03/82] Add self-closing-non-void-html-element error. --- tree-construction/foreign-fragment.dat | 10 ++++++++++ tree-construction/webkit01.dat | 2 ++ 2 files changed, 12 insertions(+) diff --git a/tree-construction/foreign-fragment.dat b/tree-construction/foreign-fragment.dat index 1f72b7a9..e171d77a 100644 --- a/tree-construction/foreign-fragment.dat +++ b/tree-construction/foreign-fragment.dat @@ -174,6 +174,8 @@ math ms 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag. 52: End of file seen and there were open elements. 51: Unclosed element “ms”. +#new-errors +(1:44-1:49) self-closing-non-void-html-element #document-fragment math ms #document @@ -215,6 +217,8 @@ math ms 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag. 52: End of file seen and there were open elements. 51: Unclosed element “mn”. +#new-errors +(1:44-1:49) self-closing-non-void-html-element #document-fragment math mn #document @@ -256,6 +260,8 @@ math mn 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag. 52: End of file seen and there were open elements. 51: Unclosed element “mo”. +#new-errors +(1:44-1:49) self-closing-non-void-html-element #document-fragment math mo #document @@ -297,6 +303,8 @@ math mo 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag. 52: End of file seen and there were open elements. 51: Unclosed element “mi”. +#new-errors +(1:44-1:49) self-closing-non-void-html-element #document-fragment math mi #document @@ -338,6 +346,8 @@ math mi 51: Self-closing syntax (“/>”) used on a non-void HTML element. Ignoring the slash and treating as a start tag. 52: End of file seen and there were open elements. 51: Unclosed element “mtext”. +#new-errors +(1:44-1:52) self-closing-non-void-html-element #document-fragment math mtext #document diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index da91d77d..146716d6 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -621,6 +621,8 @@ console.log("FOOBARBAZ"); (1,4): expected-doctype-but-got-start-tag (1,23): non-void-element-with-trailing-solidus (1,29): end-tag-too-early +#new-errors +(1:9-1:24) self-closing-non-void-html-element #document | | From 203f0ef2fb707bbacec771928510a2ac6664b369 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 29 Mar 2017 15:49:19 +0300 Subject: [PATCH 04/82] Add end-tag-with-attributes error. --- tokenizer/test1.test | 5 ++++- tokenizer/test4.test | 10 ++++++++-- tree-construction/scriptdata01.dat | 2 ++ tree-construction/tests2.dat | 2 ++ tree-construction/webkit01.dat | 7 +++++++ 5 files changed, 23 insertions(+), 3 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index b97b2cbe..7ee40fb5 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -54,7 +54,10 @@ {"description":"End Tag w/attribute", "input":"", -"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]]}, +"output":[["StartTag", "h", {}], "ParseError", ["EndTag", "h"]], +"errors":[ + { "code": "end-tag-with-attributes", "line": 1, "col": 13 } +]}, {"description":"Multiple atts", "input":"", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index fee3feff..8f2e49bb 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -186,11 +186,17 @@ {"description":"Uppercase close tag attributes", "input":"", -"output":["ParseError", ["EndTag", "x"]]}, +"output":["ParseError", ["EndTag", "x"]], +"errors":[ + { "code": "end-tag-with-attributes", "line": 1, "col": 6 } +]}, {"description":"Duplicate close tag attributes", "input":"", -"output":["ParseError", "ParseError", ["EndTag", "x"]]}, +"output":["ParseError", "ParseError", ["EndTag", "x"]], +"errors":[ + { "code": "end-tag-with-attributes", "line": 1, "col": 8 } +]}, {"description":"Permitted slash", "input":"
", diff --git a/tree-construction/scriptdata01.dat b/tree-construction/scriptdata01.dat index ac698d28..c26f0f78 100644 --- a/tree-construction/scriptdata01.dat +++ b/tree-construction/scriptdata01.dat @@ -80,6 +80,8 @@ FOOBAR #errors (1,3): expected-doctype-but-got-chars (1,31): attributes-in-end-tag +#new-errors +(1:31) end-tag-with-attributes #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index bd2d11d9..880c377b 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -236,6 +236,8 @@ (1,32): named-entity-without-semicolon (1,33): attributes-in-end-tag (1,33): unexpected-end-tag-before-html +#new-errors +(1:33) end-tag-with-attributes #document | | diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index 146716d6..99d1dbcb 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -133,6 +133,9 @@ console.log("FOOBARBAZ"); (1,5): expected-doctype-but-got-start-tag (1,21): attributes-in-end-tag (1,51): attributes-in-end-tag +#new-errors +(1:21) end-tag-with-attributes +(1:51) end-tag-with-attributes #document | | @@ -233,6 +236,8 @@ console.log("FOOBARBAZ"); (1,6): expected-doctype-but-got-start-tag (1,21): attributes-in-end-tag (1,21): unexpected-end-tag-treated-as +#new-errors +(1:21) end-tag-with-attributes #document | | @@ -259,6 +264,8 @@ console.log("FOOBARBAZ"); (1,28): attributes-in-end-tag (1,28): unexpected-end-tag-after-body (1,28): unexpected-end-tag-treated-as +#new-errors +(1:28) end-tag-with-attributes #document | | From 7b56b757f648606524645791392b6dd0953c45fb Mon Sep 17 00:00:00 2001 From: inikulin Date: Thu, 30 Mar 2017 13:05:58 +0300 Subject: [PATCH 05/82] Add self-closing-end-tag error. --- tokenizer/test4.test | 5 ++++- tree-construction/scriptdata01.dat | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 8f2e49bb..f85ed15a 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -208,7 +208,10 @@ {"description":"Permitted slash but in close tag", "input":"
", -"output":["ParseError", ["EndTag", "br"]]}, +"output":["ParseError", ["EndTag", "br"]], +"errors":[ + { "code": "self-closing-end-tag", "line": 1, "col": 6 } +]}, {"description":"Doctype public case-sensitivity (1)", "input":"", diff --git a/tree-construction/scriptdata01.dat b/tree-construction/scriptdata01.dat index c26f0f78..28e34138 100644 --- a/tree-construction/scriptdata01.dat +++ b/tree-construction/scriptdata01.dat @@ -40,6 +40,8 @@ FOOBAR #errors (1,3): expected-doctype-but-got-chars (1,21): self-closing-flag-on-end-tag +#new-errors +(1:21) self-closing-end-tag #document | | From 91f17a61d335fe63154d23bcb5926f49da6bfa32 Mon Sep 17 00:00:00 2001 From: inikulin Date: Thu, 30 Mar 2017 16:19:17 +0300 Subject: [PATCH 06/82] Add unexpected-null-character error. --- tokenizer/test2.test | 5 ++++- tokenizer/test3.test | 5 ++++- tokenizer/test4.test | 5 ++++- tokenizer/unicodeCharsProblematic.test | 5 ++++- ...pending-spec-changes-plain-text-unsafe.dat | Bin 816 -> 927 bytes tree-construction/plain-text-unsafe.dat | Bin 7925 -> 9044 bytes 6 files changed, 16 insertions(+), 4 deletions(-) diff --git a/tokenizer/test2.test b/tokenizer/test2.test index 87a8eba3..4abafd89 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -134,7 +134,10 @@ {"description":"Null Byte Replacement", "input":"\u0000", -"output":["ParseError", ["Character", "\u0000"]]}, +"output":["ParseError", ["Character", "\u0000"]], +"errors":[ + { "code": "unexpected-null-character", "line": 1, "col": 1 } +]}, {"description":"Comment with dash", "input":"", -"output":["ParseError", ["Comment", "?foo--"]]}, +"output":["ParseError", ["Comment", "?foo--"]], +"errors":[ + { "code": "start-of-processing-instruction-or-xml-declaration", "line": 1, "col": 2 } +]}, {"description":"Unescaped <", "input":"foo < bar", -"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]]}, +"output":[["Character", "foo "], "ParseError", ["Character", "< bar"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 6 } +]}, {"description":"Null Byte Replacement", "input":"\u0000", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 204b224d..d108f94b 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -81,37 +81,54 @@ {"description":"<", "input":"<", -"output":["ParseError", ["Character", "<"]]}, +"output":["ParseError", ["Character", "<"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"<\\u0000", "input":"<\u0000", "output":["ParseError", ["Character", "<"], "ParseError", ["Character", "\u0000"]], "errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 }, { "code": "unexpected-null-character", "line": 1, "col": 2 } ]}, {"description":"<\\u0009", "input":"<\u0009", -"output":["ParseError", ["Character", "<\u0009"]]}, +"output":["ParseError", ["Character", "<\u0009"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"<\\u000A", "input":"<\u000A", -"output":["ParseError", ["Character", "<\u000A"]]}, +"output":["ParseError", ["Character", "<\u000A"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"<\\u000B", "input":"<\u000B", "output":["ParseError", "ParseError", ["Character", "<\u000B"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 2 } + { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 2 }, + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\u000C", "input":"<\u000C", -"output":["ParseError", ["Character", "<\u000C"]]}, +"output":["ParseError", ["Character", "<\u000C"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"< ", "input":"< ", -"output":["ParseError", ["Character", "< "]]}, +"output":["ParseError", ["Character", "< "]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"", "input":"<>", -"output":["ParseError", ["Character", "<>"]]}, +"output":["ParseError", ["Character", "<>"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"", "input":"", -"output":["ParseError", ["Comment", "?"]]}, +"output":["ParseError", ["Comment", "?"]], +"errors":[ + { "code": "start-of-processing-instruction-or-xml-declaration", "line": 1, "col": 2 } +]}, {"description":"", "input":"
", @@ -4719,11 +4867,17 @@ {"description":"<[", "input":"<[", -"output":["ParseError", ["Character", "<["]]}, +"output":["ParseError", ["Character", "<["]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"<`", "input":"<`", -"output":["ParseError", ["Character", "<`"]]}, +"output":["ParseError", ["Character", "<`"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"", "input":"", @@ -6239,11 +6393,17 @@ {"description":"<{", "input":"<{", -"output":["ParseError", ["Character", "<{"]]}, +"output":["ParseError", ["Character", "<{"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"<\\uDBC0\\uDC00", "input":"<\uDBC0\uDC00", -"output":["ParseError", ["Character", "<\uDBC0\uDC00"]]}, +"output":["ParseError", ["Character", "<\uDBC0\uDC00"]], +"errors":[ + { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } +]}, {"description":"=", "input":"=", diff --git a/tree-construction/comments01.dat b/tree-construction/comments01.dat index 35ec6cce..8d708dff 100644 --- a/tree-construction/comments01.dat +++ b/tree-construction/comments01.dat @@ -123,6 +123,8 @@ FOOBAZ #errors (1,1): expected-tag-name-but-got-question-mark (1,22): expected-doctype-but-got-chars +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | @@ -135,6 +137,8 @@ FOOBAZ #errors (1,1): expected-tag-name-but-got-question-mark (1,20): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | @@ -146,6 +150,8 @@ FOOBAZ #errors (1,1): expected-tag-name-but-got-question-mark (1,13): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | diff --git a/tree-construction/html5test-com.dat b/tree-construction/html5test-com.dat index 8c6ec40c..49aaa1ca 100644 --- a/tree-construction/html5test-com.dat +++ b/tree-construction/html5test-com.dat @@ -124,6 +124,8 @@ #errors (1,1): expected-tag-name-but-got-question-mark (1,47): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | diff --git a/tree-construction/tests1.dat b/tree-construction/tests1.dat index 33f6dc24..76952725 100644 --- a/tree-construction/tests1.dat +++ b/tree-construction/tests1.dat @@ -498,6 +498,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name (1,1): expected-doctype-but-got-chars +#new-errors +(1:2) unexpected-first-character-of-tag-name #document | | @@ -509,6 +511,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name (1,1): expected-doctype-but-got-chars +#new-errors +(1:2) unexpected-first-character-of-tag-name #document | | @@ -542,6 +546,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name-but-got-question-mark (1,2): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | @@ -553,6 +559,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name-but-got-question-mark (1,3): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | @@ -586,6 +594,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name-but-got-question-mark (1,11): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | @@ -619,6 +629,8 @@ Line1
Line2
Line3
Line4 #errors (1,1): expected-tag-name-but-got-question-mark (1,13): expected-doctype-but-got-eof +#new-errors +(1:2) start-of-processing-instruction-or-xml-declaration #document | | diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index 99d1dbcb..73a8246a 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -201,6 +201,8 @@ console.log("FOOBARBAZ"); (1,3): expected-doctype-but-got-start-tag (1,8): expected-tag-name (1,12): expected-closing-tag-but-got-eof +#new-errors +(1:9) unexpected-first-character-of-tag-name #document | | From c8d5a026c3bd2e28c21170a240d9a4ec87b7a44b Mon Sep 17 00:00:00 2001 From: inikulin Date: Mon, 3 Apr 2017 20:43:33 +0300 Subject: [PATCH 09/82] Add End tag open state parse errors. --- tokenizer/test1.test | 7 ++-- tokenizer/test2.test | 27 ++++++++++++---- tokenizer/test3.test | 58 +++++++++++++++++++--------------- tree-construction/tests1.dat | 6 ++-- tree-construction/webkit01.dat | 2 +- 5 files changed, 63 insertions(+), 37 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index 6f2f0d59..b69ee5c8 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -30,13 +30,16 @@ {"description":"Empty end tag", "input":"", -"output":["ParseError"]}, +"output":["ParseError"], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 3 } +]}, {"description":"Empty start tag", "input":"<>", "output":["ParseError", ["Character", "<>"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"Start Tag w/attribute", diff --git a/tokenizer/test2.test b/tokenizer/test2.test index 216943fe..8ce8908f 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -114,7 +114,10 @@ {"description":"Unescaped ", @@ -138,7 +141,7 @@ "input":"foo < bar", "output":[["Character", "foo "], "ParseError", ["Character", "< bar"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 6 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 6 } ]}, {"description":"Null Byte Replacement", @@ -174,18 +177,30 @@ {"description":"Empty end tag with following characters", "input":"abc", -"output":[["Character", "a"], "ParseError", ["Character", "bc"]]}, +"output":[["Character", "a"], "ParseError", ["Character", "bc"]], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 4 } +]}, {"description":"Empty end tag with following tag", "input":"ac", -"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]]}, +"output":[["Character", "a"], "ParseError", ["StartTag", "b", {}], ["Character", "c"]], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 4 } +]}, {"description":"Empty end tag with following comment", "input":"ac", -"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]]}, +"output":[["Character", "a"], "ParseError", ["Comment", "b"], ["Character", "c"]], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 4 } +]}, {"description":"Empty end tag with following end tag", "input":"ac", -"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]]} +"output":[["Character", "a"], "ParseError", ["EndTag", "b"], ["Character", "c"]], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 4 } +]} ]} diff --git a/tokenizer/test3.test b/tokenizer/test3.test index d108f94b..d2e3fa4b 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -83,14 +83,14 @@ "input":"<", "output":["ParseError", ["Character", "<"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\u0000", "input":"<\u0000", "output":["ParseError", ["Character", "<"], "ParseError", ["Character", "\u0000"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 }, + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 }, { "code": "unexpected-null-character", "line": 1, "col": 2 } ]}, @@ -98,14 +98,14 @@ "input":"<\u0009", "output":["ParseError", ["Character", "<\u0009"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\u000A", "input":"<\u000A", "output":["ParseError", ["Character", "<\u000A"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\u000B", @@ -113,21 +113,21 @@ "output":["ParseError", "ParseError", ["Character", "<\u000B"]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 2 }, - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\u000C", "input":"<\u000C", "output":["ParseError", ["Character", "<\u000C"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"< ", "input":"< ", "output":["ParseError", ["Character", "< "]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"", "input":"", -"output":["ParseError"]}, +"output":["ParseError"], +"errors":[ + { "code": "missing-end-tag-name", "line": 1, "col": 3 } +]}, {"description":"", "input":"<>", "output":["ParseError", ["Character", "<>"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"", @@ -4869,14 +4875,14 @@ "input":"<[", "output":["ParseError", ["Character", "<["]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<`", "input":"<`", "output":["ParseError", ["Character", "<`"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"
", @@ -6395,14 +6401,14 @@ "input":"<{", "output":["ParseError", ["Character", "<{"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"<\\uDBC0\\uDC00", "input":"<\uDBC0\uDC00", "output":["ParseError", ["Character", "<\uDBC0\uDC00"]], "errors":[ - { "code": "unexpected-first-character-of-tag-name", "line": 1, "col": 2 } + { "code": "unexpected-first-character-of-start-tag-name", "line": 1, "col": 2 } ]}, {"description":"=", diff --git a/tree-construction/tests1.dat b/tree-construction/tests1.dat index 76952725..75d20d91 100644 --- a/tree-construction/tests1.dat +++ b/tree-construction/tests1.dat @@ -499,7 +499,7 @@ Line1
Line2
Line3
Line4 (1,1): expected-tag-name (1,1): expected-doctype-but-got-chars #new-errors -(1:2) unexpected-first-character-of-tag-name +(1:2) unexpected-first-character-of-start-tag-name #document | | @@ -512,7 +512,7 @@ Line1
Line2
Line3
Line4 (1,1): expected-tag-name (1,1): expected-doctype-but-got-chars #new-errors -(1:2) unexpected-first-character-of-tag-name +(1:2) unexpected-first-character-of-start-tag-name #document | | @@ -524,6 +524,8 @@ Line1
Line2
Line3
Line4 #errors (1,2): expected-closing-tag-but-got-eof (1,2): expected-doctype-but-got-chars +#new-errors +(1:3) eof-before-end-tag-name #document | | diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index 73a8246a..94b23cc3 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -202,7 +202,7 @@ console.log("FOOBARBAZ"); (1,8): expected-tag-name (1,12): expected-closing-tag-but-got-eof #new-errors -(1:9) unexpected-first-character-of-tag-name +(1:9) unexpected-first-character-of-start-tag-name #document | | From a1938c257faf7e8168338d173cb3ac838d5dbe19 Mon Sep 17 00:00:00 2001 From: inikulin Date: Tue, 4 Apr 2017 01:18:42 +0300 Subject: [PATCH 10/82] Add Markup declaration open state parse errors. --- tokenizer/domjs.test | 8 ++ tokenizer/test1.test | 10 +- tokenizer/test3.test | 158 +++++++++++++++++++----- tokenizer/test4.test | 9 +- tree-construction/html5test-com.dat | 2 + tree-construction/plain-text-unsafe.dat | Bin 9154 -> 9226 bytes tree-construction/tests1.dat | 8 ++ tree-construction/tests10.dat | 2 + tree-construction/tests21.dat | 4 + tree-construction/tests26.dat | 2 + 10 files changed, 168 insertions(+), 35 deletions(-) diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index 7769b52a..9942f3af 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -102,6 +102,14 @@ "description":"space EOF after doctype ", "input":"", + "output":[["Comment", "[CDATA[foo]]"]], + "errors":[ + { "code": "cdata-in-html-content", "line": 1, "col": 9 } + ] } ] diff --git a/tokenizer/test1.test b/tokenizer/test1.test index b69ee5c8..e4ff196c 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -18,7 +18,10 @@ {"description":"Truncated doctype start", "input":"", -"output":["ParseError", ["Comment", "DOC"]]}, +"output":["ParseError", ["Comment", "DOC"]], +"errors":[ + { "code": "malformed-comment", "line": 1, "col": 3 } +]}, {"description":"Doctype in error", "input":"", @@ -95,7 +98,10 @@ {"description":"Start of a comment", "input":"", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index d2e3fa4b..86ed0c8a 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -132,54 +132,88 @@ {"description":"", "input":"", -"output":["ParseError", ["Comment", ""]]}, +"output":["ParseError", ["Comment", ""]], +"errors":[ + { "code": "malformed-comment", "line": 1, "col": 3 } +]}, {"description":" | diff --git a/tree-construction/plain-text-unsafe.dat b/tree-construction/plain-text-unsafe.dat index 9bfec5c91aea728585e1dae7705498958e2fb474..c84c33aedc212077f3d988b18e0098fa90868f1c 100644 GIT binary patch delta 97 zcmX@)-sQ1?M=3A0TsO6-D8HzfOT*C0Qd1!}F()m*C^t1lH#t8yH#M(hvLmLine2
Line3
Line4 #errors (1,2): expected-dashes-or-doctype (1,2): expected-doctype-but-got-eof +#new-errors +(1:3) malformed-comment #document | | @@ -585,6 +587,8 @@ Line1
Line2
Line3
Line4 #errors (1,2): expected-dashes-or-doctype (1,3): expected-doctype-but-got-eof +#new-errors +(1:3) malformed-comment #document | | @@ -609,6 +613,8 @@ Line1
Line2
Line3
Line4 #errors (1,2): expected-dashes-or-doctype (1,10): expected-doctype-but-got-eof +#new-errors +(1:3) malformed-comment #document | | @@ -644,6 +650,8 @@ Line1
Line2
Line3
Line4 #errors (1,2): expected-dashes-or-doctype (1,12): expected-doctype-but-got-eof +#new-errors +(1:3) malformed-comment #document | | diff --git a/tree-construction/tests10.dat b/tree-construction/tests10.dat index 3e9a9f19..f84e2d54 100644 --- a/tree-construction/tests10.dat +++ b/tree-construction/tests10.dat @@ -12,6 +12,8 @@ #errors (1,28) expected-dashes-or-doctype +#new-errors +(1:35) cdata-in-html-content #document | | diff --git a/tree-construction/tests21.dat b/tree-construction/tests21.dat index d384a555..170945fa 100644 --- a/tree-construction/tests21.dat +++ b/tree-construction/tests21.dat @@ -28,6 +28,8 @@ (1,5): expected-doctype-but-got-start-tag (1,7): expected-dashes-or-doctype (1,20): expected-closing-tag-but-got-eof +#new-errors +(1:14) cdata-in-html-content #document | | @@ -183,6 +185,8 @@ (1,5): expected-doctype-but-got-start-tag (1,27): expected-dashes-or-doctype (1,40): expected-closing-tag-but-got-eof +#new-errors +(1:34) cdata-in-html-content #document | | diff --git a/tree-construction/tests26.dat b/tree-construction/tests26.dat index 8964624a..faadfa35 100644 --- a/tree-construction/tests26.dat +++ b/tree-construction/tests26.dat @@ -364,6 +364,8 @@ #errors (1,28): expected-dashes-or-doctype (1,34): expected-closing-tag-but-got-eof +#new-errors +(1:29) malformed-comment #document | | From 2cada342361c8e3df622ee2563bebc2723b032d3 Mon Sep 17 00:00:00 2001 From: inikulin Date: Thu, 6 Apr 2017 15:38:19 +0300 Subject: [PATCH 11/82] Add Script data escaped state parse errors. --- tokenizer/domjs.test | 19 +++++++++ tree-construction/domjs-unsafe.dat | Bin 9929 -> 10020 bytes tree-construction/scriptdata01.dat | 14 +++++++ tree-construction/tests16.dat | 64 +++++++++++++++++++++++++++++ 4 files changed, 97 insertions(+) diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index 9942f3af..88b16d45 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -34,6 +34,25 @@ { "code": "unexpected-null-character", "line": 1, "col": 1 } ] }, + { + "description":"NUL in script HTML comment", + "doubleEscaped":true, + "initialStates":["Script data state"], + "input":"", + "output":[["Character", ""]], + "errors":[ + { "code": "unexpected-null-character", "line": 1, "col": 9 } + ] + }, + { + "description":"EOF in script HTML comment", + "initialStates":["Script data state"], + "input":"'BAR (1,3): expected-doctype-but-got-chars (1,61): expected-script-data-but-got-eof (1,61): expected-named-closing-tag-but-got-eof +#new-errors +(1:62) eof-in-script-html-comment #document | | @@ -288,6 +296,8 @@ FOOBAR #errors (1,3): expected-doctype-but-got-chars (1,20): unexpected-character-after-solidus-in-tag +#new-errors +(1:21) self-closing-start-tag #document | | diff --git a/tree-construction/tests16.dat b/tree-construction/tests16.dat index 0325dfca..34a708ec 100644 --- a/tree-construction/tests16.dat +++ b/tree-construction/tests16.dat @@ -631,6 +631,8 @@ #errors (1,53): unexpected-EOF-after-solidus-in-tag (1,53): expected-named-closing-tag-but-got-eof +#new-errors +(1:54) eof-in-tag #document | | @@ -804,6 +806,8 @@ #errors (1,47): unexpected-EOF-after-solidus-in-tag (1,47): expected-named-closing-tag-but-got-eof +#new-errors +(1:48) eof-in-tag #document | | @@ -1927,6 +1931,8 @@ (1,8): expected-doctype-but-got-start-tag (1,38): unexpected-EOF-after-solidus-in-tag (1,38): expected-named-closing-tag-but-got-eof +#new-errors +(1:39) eof-in-tag #document | | @@ -2070,6 +2076,8 @@ (1,8): expected-doctype-but-got-start-tag (1,32): unexpected-EOF-after-solidus-in-tag (1,32): expected-named-closing-tag-but-got-eof +#new-errors +(1:33) eof-in-tag #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index d2d89eed..bdc5d766 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -724,6 +724,10 @@ x { content:" | diff --git a/tree-construction/tests26.dat b/tree-construction/tests26.dat index 2f7e35ab..fff2ff7a 100644 --- a/tree-construction/tests26.dat +++ b/tree-construction/tests26.dat @@ -271,6 +271,7 @@ (2,0): expected-closing-tag-but-got-eof #new-errors (1:11) unexpected-character-in-attribute-name +(1:13) self-closing-start-tag #document | | diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index 33cae457..a26e4dfd 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -191,6 +191,10 @@ console.log("FOOBARBAZ"); (1,16): unexpected-character-after-solidus-in-tag (1,24): expected-doctype-but-got-start-tag (1,24): expected-closing-tag-but-got-eof +#new-errors +(1:8) self-closing-start-tag +(1:9) self-closing-start-tag +(1:17) self-closing-start-tag #document | | From 3f7b0c91bae79ffceced90648d66a1ba4e0f42e6 Mon Sep 17 00:00:00 2001 From: Diego Date: Fri, 14 Apr 2017 08:54:04 -0700 Subject: [PATCH 38/82] Generalize eof-in-tag error, Rename attrValue errors (more accurate) --- tokenizer/test1.test | 4 +- tokenizer/test2.test | 2 +- tokenizer/test3.test | 148 ++++++++++++++-------------- tokenizer/test4.test | 36 +++---- tree-construction/html5test-com.dat | 6 +- tree-construction/scriptdata01.dat | 2 +- tree-construction/tests2.dat | 6 +- tree-construction/tests26.dat | 2 +- tree-construction/webkit01.dat | 6 +- tree-construction/webkit02.dat | 4 +- 10 files changed, 108 insertions(+), 108 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index fa622c1d..e109d575 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -76,7 +76,7 @@ "input":"", "output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 9 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 9 } ]}, {"description":"Repeated attr", @@ -226,7 +226,7 @@ "input":"
", "output":["ParseError", ["StartTag", "a", {"a":"f<"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]} ]} diff --git a/tokenizer/test2.test b/tokenizer/test2.test index 83458d71..dbf08014 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -108,7 +108,7 @@ "input":"", "output":["ParseError", ["StartTag", "h", { "a":"b" }]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"Double-quoted attribute value", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index ef0c8778..bb7d818c 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -5916,7 +5916,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "missing-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -6044,7 +6044,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "missing-attribute-value", "line": 1, "col": 6 } ]}, {"description":"", @@ -6065,14 +6065,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "missing-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 2, "col": 1 } + { "code": "missing-attribute-value", "line": 2, "col": 1 } ]}, {"description":"", @@ -6086,14 +6086,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "missing-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 2, "col": 1 } + { "code": "missing-attribute-value", "line": 2, "col": 1 } ]}, {"description":"", @@ -6107,7 +6107,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "missing-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -6326,7 +6326,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, { "code": "unexpected-null-character", "line": 1, "col": 8 } ]}, @@ -6335,7 +6335,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", @@ -6351,7 +6351,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", @@ -6367,7 +6367,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", @@ -6378,14 +6378,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "!":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6393,14 +6393,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "&":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6408,14 +6408,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "-":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", ".":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", @@ -6426,28 +6426,28 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "0":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "1":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "9":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "<":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6455,7 +6455,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "=":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 8 } ]}, @@ -6467,91 +6467,91 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "?":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "@":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "`":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "{":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } ]}, {"description":"", @@ -6670,21 +6670,21 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"<"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"="}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "missing-attribute-value", "line": 1, "col": 6 } ]}, {"description":"", @@ -6715,7 +6715,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"`"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } ]}, {"description":"", @@ -6778,7 +6778,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a\""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -6797,7 +6797,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a'"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -6828,14 +6828,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a<"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a="}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -6870,7 +6870,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a`"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"", @@ -7029,7 +7029,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"\uFFFD":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-null-character", "line": 1, "col": 4 } ]}, @@ -7037,14 +7037,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", @@ -7052,35 +7052,35 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 }, - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"!":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"\"":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 } ]}, @@ -7088,14 +7088,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"&":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"'":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 } ]}, @@ -7103,42 +7103,42 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"-":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {}, true]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"0":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"1":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"9":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"<":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 4 } ]}, @@ -7146,7 +7146,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"=":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 } ]}, @@ -7158,91 +7158,91 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"?":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"@":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"b":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"y":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"z":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"`":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"b":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"y":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"z":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"{":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"\uDBC0\uDC00":""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 } + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, {"description":"", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 7302bcc9..ac39270a 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -4,22 +4,22 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "z", {"0": "", "<": ""}]], "errors":[ - { "code": "self-closing-start-tag", "line": 1, "col": 4 }, + { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 7 } ]}, -{"description":"< in attribute value", +{"description":"", "input":"", "output":["ParseError", ["StartTag", "z", {"x": "<"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } ]}, {"description":"= in unquoted attribute value", "input":"", "output":["ParseError", ["StartTag", "z", {"z": "z=z"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"= attribute", @@ -34,7 +34,7 @@ "output":["ParseError", "ParseError", ["StartTag", "z", {"=": ""}]], "errors":[ { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "missing-attribute-value", "line": 1, "col": 6 } ]}, {"description":"=== attribute", @@ -42,7 +42,7 @@ "output":["ParseError", "ParseError", ["StartTag", "z", {"=": "="}]], "errors":[ { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 } ]}, {"description":"==== attribute", @@ -50,8 +50,8 @@ "output":["ParseError", "ParseError", "ParseError", ["StartTag", "z", {"=": "=="}]], "errors":[ { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 4 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 6 }, - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 7 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 6 }, + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 7 } ]}, {"description":"\" after ampersand in double-quoted attribute value", @@ -110,7 +110,7 @@ "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b'c"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 9 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 } ]}, @@ -118,21 +118,21 @@ "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b\"c"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 9 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 9 } ]}, {"description":"Double-quoted attribute value not followed by whitespace", "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 11 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 11 } ]}, {"description":"Single-quoted attribute value not followed by whitespace", "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 11 } + { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 11 } ]}, {"description":"Quoted attribute followed by permitted /", @@ -386,7 +386,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"aa`"}]], "errors":[ - { "code": "unexpected-character-in-attribute-value", "line": 1, "col": 8 } + { "code": "unexpected-character-in-unquoted-attribute-value", "line": 1, "col": 8 } ]}, {"description":"EOF in tag name state ", @@ -421,35 +421,35 @@ "input":" #errors (1,10): equals-in-unquoted-attribute-value -(1,14): unexpected-character-in-unquoted-attribute-value +(1,14): missing-whitespace-after-attribute-value (1,15): expected-doctype-but-got-start-tag (1,15): expected-closing-tag-but-got-eof #new-errors -(1:10) unexpected-character-in-attribute-value -(1:14) unexpected-character-in-attribute-value +(1:10) unexpected-character-in-unquoted-attribute-value +(1:14) unexpected-character-in-unquoted-attribute-value #document | | diff --git a/tree-construction/scriptdata01.dat b/tree-construction/scriptdata01.dat index 31f9dc3b..9cb83c75 100644 --- a/tree-construction/scriptdata01.dat +++ b/tree-construction/scriptdata01.dat @@ -56,7 +56,7 @@ FOOBAR (1,3): expected-doctype-but-got-chars (1,20): unexpected-character-after-solidus-in-tag #new-errors -(1:21) self-closing-start-tag +(1:21) abruption-of-tag-self-closure #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index bdc5d766..83fef840 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -725,9 +725,9 @@ x { content:" | diff --git a/tree-construction/tests26.dat b/tree-construction/tests26.dat index fff2ff7a..eb9a5bc3 100644 --- a/tree-construction/tests26.dat +++ b/tree-construction/tests26.dat @@ -271,7 +271,7 @@ (2,0): expected-closing-tag-but-got-eof #new-errors (1:11) unexpected-character-in-attribute-name -(1:13) self-closing-start-tag +(1:13) abruption-of-tag-self-closure #document | | diff --git a/tree-construction/webkit01.dat b/tree-construction/webkit01.dat index a26e4dfd..2a0e52c7 100644 --- a/tree-construction/webkit01.dat +++ b/tree-construction/webkit01.dat @@ -192,9 +192,9 @@ console.log("FOOBARBAZ"); (1,24): expected-doctype-but-got-start-tag (1,24): expected-closing-tag-but-got-eof #new-errors -(1:8) self-closing-start-tag -(1:9) self-closing-start-tag -(1:17) self-closing-start-tag +(1:8) abruption-of-tag-self-closure +(1:9) abruption-of-tag-self-closure +(1:17) abruption-of-tag-self-closure #document | | diff --git a/tree-construction/webkit02.dat b/tree-construction/webkit02.dat index 494fbfab..4dcfe67b 100644 --- a/tree-construction/webkit02.dat +++ b/tree-construction/webkit02.dat @@ -59,9 +59,9 @@ ><div>A</div></body></html>
 #errors
 (1,6): expected-doctype-but-got-start-tag
-(1,67): eof-in-attribute-value-double-quote
+(1,67): eof-in-tag-double-quote
 #new-errors
-(1:68) eof-in-attribute-value
+(1:68) eof-in-tag
 #document
 | <html>
 |   <head>

From 1a2cf4c0a54d6b3bb84ada67028c4ef594f0db67 Mon Sep 17 00:00:00 2001
From: Diego <dval@salesforce.com>
Date: Sat, 15 Apr 2017 11:03:42 -0700
Subject: [PATCH 39/82] Rename for better semantics:
 missing-whitespace-between-attributes

---
 tokenizer/test1.test                |  2 +-
 tokenizer/test3.test                | 56 ++++++++++++++---------------
 tokenizer/test4.test                |  4 +--
 tree-construction/html5test-com.dat |  2 +-
 4 files changed, 32 insertions(+), 32 deletions(-)

diff --git a/tokenizer/test1.test b/tokenizer/test1.test
index e109d575..3b2a872e 100644
--- a/tokenizer/test1.test
+++ b/tokenizer/test1.test
@@ -76,7 +76,7 @@
 ", "output":["ParseError", ["StartTag", "h", {"a":"b", "c":"d"}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 9 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 9 } ]}, {"description":"Repeated attr", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index bb7d818c..5d52c0d7 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -6326,7 +6326,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\uFFFD":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }, { "code": "unexpected-null-character", "line": 1, "col": 8 } ]}, @@ -6335,7 +6335,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", @@ -6351,7 +6351,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", @@ -6367,7 +6367,7 @@ "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], "errors":[ { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", @@ -6378,14 +6378,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "!":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\"":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6393,14 +6393,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "&":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "'":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6408,14 +6408,14 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "-":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", ".":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", @@ -6426,28 +6426,28 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "0":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "1":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "9":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "<":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }, { "code": "unexpected-character-in-attribute-name", "line": 1, "col": 8 } ]}, @@ -6455,7 +6455,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "=":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 }, + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 }, { "code": "unexpected-equals-sign-before-attribute-name", "line": 1, "col": 8 } ]}, @@ -6467,91 +6467,91 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "?":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "@":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "`":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "b":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "y":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "z":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "{":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "\uDBC0\uDC00":""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 8 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, {"description":"", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index ac39270a..794baa49 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -125,14 +125,14 @@ "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 11 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 } ]}, {"description":"Single-quoted attribute value not followed by whitespace", "input":"", "output":["ParseError", ["StartTag", "foo", {"a": "b", "c": ""}]], "errors":[ - { "code": "missing-whitespace-after-attribute-value", "line": 1, "col": 11 } + { "code": "missing-whitespace-between-attributes", "line": 1, "col": 11 } ]}, {"description":"Quoted attribute followed by permitted /", diff --git a/tree-construction/html5test-com.dat b/tree-construction/html5test-com.dat index c27baf38..36b4b774 100644 --- a/tree-construction/html5test-com.dat +++ b/tree-construction/html5test-com.dat @@ -28,7 +28,7 @@
#errors (1,10): equals-in-unquoted-attribute-value -(1,14): missing-whitespace-after-attribute-value +(1,14): missing-whitespace-between-attributes (1,15): expected-doctype-but-got-start-tag (1,15): expected-closing-tag-but-got-eof #new-errors From b9511ae85155bc249049054fc92540be425abf38 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 19 Apr 2017 16:15:01 +0300 Subject: [PATCH 40/82] Add Hexademical character reference start state errors. --- tokenizer/test1.test | 5 ++++- tokenizer/test4.test | 13 ++++++++++++- tree-construction/entities01.dat | 4 ++++ tree-construction/tests2.dat | 4 ++++ 4 files changed, 24 insertions(+), 2 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index 3b2a872e..27e7787d 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -157,7 +157,10 @@ {"description":"Unfinished numeric entity", "input":"&#x", -"output":["ParseError", ["Character", "&#x"]]}, +"output":["ParseError", ["Character", "&#x"]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 } +]}, {"description":"Entity with trailing semicolon (1)", "input":"I'm ¬it", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 794baa49..6a080856 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -179,7 +179,18 @@ {"description":"Empty hex numeric entities", "input":"&#x &#X ", -"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]]}, +"output":["ParseError", ["Character", "&#x "], "ParseError", ["Character", "&#X "]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 }, + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 } +]}, + +{"description":"Invalid digit in hex numeric entity", +"input":"&#xZ", +"output":[["Character", "&#xZ"]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 } +]}, {"description":"Empty decimal numeric entities", "input":"&# &#; ", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index 642c6f2f..4b868a82 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -161,6 +161,8 @@ FOO&#xZOO #errors (1,3): expected-doctype-but-got-chars (1,6): expected-numeric-entity +#new-errors +(1:7) absence-of-digits-in-numeric-character-reference #document | | @@ -172,6 +174,8 @@ FOO&#XZOO #errors (1,3): expected-doctype-but-got-chars (1,6): expected-numeric-entity +#new-errors +(1:7) absence-of-digits-in-numeric-character-reference #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index 83fef840..5ed5ee63 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -288,6 +288,8 @@ #errors (1,3): expected-numeric-entity (1,3): expected-doctype-but-got-chars +#new-errors +(1:4) absence-of-digits-in-numeric-character-reference #document | | @@ -299,6 +301,8 @@ #errors (1,3): expected-numeric-entity (1,3): expected-doctype-but-got-chars +#new-errors +(1:4) absence-of-digits-in-numeric-character-reference #document | | From 090cd9c4b8105a14fee4115617d563900705a2d9 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 19 Apr 2017 16:44:32 +0300 Subject: [PATCH 41/82] Add Decimal character reference start state errors. --- tokenizer/test1.test | 5 ++++- tokenizer/test4.test | 13 ++++++++++++- tree-construction/entities01.dat | 4 ++++ tree-construction/tests2.dat | 2 ++ 4 files changed, 22 insertions(+), 2 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index 27e7787d..f618f6f9 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -153,7 +153,10 @@ {"description":"Ampersand, number sign", "input":"&#", -"output":["ParseError", ["Character", "&#"]]}, +"output":["ParseError", ["Character", "&#"]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 } +]}, {"description":"Unfinished numeric entity", "input":"&#x", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 6a080856..1cecd648 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -194,7 +194,18 @@ {"description":"Empty decimal numeric entities", "input":"&# &#; ", -"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]]}, +"output":["ParseError", ["Character", "&# "], "ParseError", ["Character", "&#; "]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 }, + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 } +]}, + +{"description":"Invalid digit in decimal numeric entity", +"input":"&#A", +"output":[["Character", "&#A"]], +"errors":[ + { "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 } +]}, {"description":"Non-BMP numeric entity", "input":"𐀀", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index 4b868a82..85ce29bd 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -128,6 +128,8 @@ FOO&#BAR #errors (1,3): expected-doctype-but-got-chars (1,5): expected-numeric-entity +#new-errors +(1:6) absence-of-digits-in-numeric-character-reference #document | | @@ -139,6 +141,8 @@ FOO&#ZOO #errors (1,3): expected-doctype-but-got-chars (1,5): expected-numeric-entity +#new-errors +(1:6) absence-of-digits-in-numeric-character-reference #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index 5ed5ee63..d2dea2a1 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -277,6 +277,8 @@ #errors (1,2): expected-numeric-entity (1,2): expected-doctype-but-got-chars +#new-errors +(1:3) absence-of-digits-in-numeric-character-reference #document | | From 741526725498848e5ddadaeffe6f41aa3cc16991 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 19 Apr 2017 17:03:15 +0300 Subject: [PATCH 42/82] Add Hexademical character reference state errors. --- tokenizer/test4.test | 5 ++++- tree-construction/entities01.dat | 6 ++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 1cecd648..ec320552 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -163,7 +163,10 @@ {"description":"Zero hex numeric entity", "input":"�", -"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, {"description":"Zero decimal numeric entity", "input":"�", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index 85ce29bd..bdb3909e 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -154,6 +154,8 @@ FOOºR #errors (1,3): expected-doctype-but-got-chars (1,7): expected-numeric-entity +#new-errors +(1:9) missing-semicolon-after-character-reference #document | | @@ -202,6 +204,8 @@ FOO䆺R #errors (1,3): expected-doctype-but-got-chars (1,10): numeric-entity-without-semicolon +#new-errors +(1:11) missing-semicolon-after-character-reference #document | | @@ -213,6 +217,8 @@ FOOAZOO #errors (1,3): expected-doctype-but-got-chars (1,8): numeric-entity-without-semicolon +#new-errors +(1:9) missing-semicolon-after-character-reference #document | | From 10b8ddd1794c934651ed26befd0f700335bc362f Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 19 Apr 2017 17:31:51 +0300 Subject: [PATCH 43/82] Add Decimal character reference state errors. --- tokenizer/entities.test | 20 ++++++++++++++++---- tokenizer/numericEntities.test | 30 ++++++++++++++++++++++++------ tokenizer/test4.test | 5 ++++- tree-construction/entities01.dat | 14 ++++++++++++++ tree-construction/tests2.dat | 2 ++ 5 files changed, 60 insertions(+), 11 deletions(-) diff --git a/tokenizer/entities.test b/tokenizer/entities.test index 27b85a1c..537720e2 100644 --- a/tokenizer/entities.test +++ b/tokenizer/entities.test @@ -266,18 +266,30 @@ {"description": "Decimal numeric entity followed by hex character a.", "input":"aa", -"output": ["ParseError", ["Character", "aa"]]}, +"output": ["ParseError", ["Character", "aa"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, {"description": "Decimal numeric entity followed by hex character A.", "input":"aA", -"output": ["ParseError", ["Character", "aA"]]}, +"output": ["ParseError", ["Character", "aA"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, {"description": "Decimal numeric entity followed by hex character f.", "input":"af", -"output": ["ParseError", ["Character", "af"]]}, +"output": ["ParseError", ["Character", "af"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]}, {"description": "Decimal numeric entity followed by hex character A.", "input":"aF", -"output": ["ParseError", ["Character", "aF"]]} +"output": ["ParseError", ["Character", "aF"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } +]} ]} diff --git a/tokenizer/numericEntities.test b/tokenizer/numericEntities.test index 43de84b0..445e16a4 100644 --- a/tokenizer/numericEntities.test +++ b/tokenizer/numericEntities.test @@ -2,27 +2,45 @@ {"description": "Invalid unterminated numeric entity character overflow before EOF", "input": "�", -"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 } +]}, {"description": "Invalid unterminated numeric entity character overflow before EOF", "input": "�", -"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 } +]}, {"description": "Invalid unterminated numeric entity character overflow before EOF", "input": "�", -"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 } +]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", -"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 } +]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", -"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 } +]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", -"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]]}, +"output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 } +]}, {"description": "Invalid numeric entity character overflow", "input": "�", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index ec320552..d00d36c5 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -170,7 +170,10 @@ {"description":"Zero decimal numeric entity", "input":"�", -"output":["ParseError", "ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 } +]}, {"description":"Zero-prefixed hex numeric entity", "input":"A", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index bdb3909e..d9396fea 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -193,6 +193,8 @@ FOO)BAR #errors (1,3): expected-doctype-but-got-chars (1,7): numeric-entity-without-semicolon +#new-errors +(1:8) missing-semicolon-after-character-reference #document | | @@ -742,6 +744,8 @@ FOO� (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity (1,13): eof-in-numeric-entity +#new-errors +(1:17) missing-semicolon-after-character-reference #document | | @@ -754,6 +758,8 @@ FOO� (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity (1,13): eof-in-numeric-entity +#new-errors +(1:16) missing-semicolon-after-character-reference #document | | @@ -766,6 +772,8 @@ FOO� (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity (1,13): eof-in-numeric-entity +#new-errors +(1:18) missing-semicolon-after-character-reference #document | | @@ -778,6 +786,8 @@ FOO�ZOO (1,3): expected-doctype-but-got-chars (1,16): numeric-entity-without-semicolon (1,16): illegal-codepoint-for-numeric-entity +#new-errors +(1:17) missing-semicolon-after-character-reference #document | | @@ -790,6 +800,8 @@ FOO�ZOO (1,3): expected-doctype-but-got-chars (1,15): numeric-entity-without-semicolon (1,15): illegal-codepoint-for-numeric-entity +#new-errors +(1:16) missing-semicolon-after-character-reference #document | | @@ -802,6 +814,8 @@ FOO�ZOO (1,3): expected-doctype-but-got-chars (1,17): numeric-entity-without-semicolon (1,17): illegal-codepoint-for-numeric-entity +#new-errors +(1:18) missing-semicolon-after-character-reference #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index d2dea2a1..b2ce7169 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -316,6 +316,8 @@ #errors (1,4): numeric-entity-without-semicolon (1,4): expected-doctype-but-got-chars +#new-errors +(1:5) missing-semicolon-after-character-reference #document | | From a95baa43f116f718966d9d0a92995095f46f5be3 Mon Sep 17 00:00:00 2001 From: inikulin Date: Thu, 20 Apr 2017 14:38:30 +0300 Subject: [PATCH 44/82] Add Numeric character reference end state errors. --- tokenizer/entities.test | 320 ++++++++++++--- tokenizer/numericEntities.test | 522 +++++++++++++++++++----- tokenizer/test2.test | 26 +- tokenizer/test4.test | 50 ++- tree-construction/entities01.dat | 88 ++++ tree-construction/plain-text-unsafe.dat | Bin 9226 -> 9286 bytes 6 files changed, 821 insertions(+), 185 deletions(-) diff --git a/tokenizer/entities.test b/tokenizer/entities.test index 537720e2..7b997153 100644 --- a/tokenizer/entities.test +++ b/tokenizer/entities.test @@ -10,259 +10,451 @@ {"description": "CR as numeric entity", "input":" ", -"output": ["ParseError", ["Character", "\r"]]}, +"output": ["ParseError", ["Character", "\r"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 7 } +]}, {"description": "CR as hexadecimal numeric entity", "input":" ", -"output": ["ParseError", ["Character", "\r"]]}, +"output": ["ParseError", ["Character", "\r"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EURO SIGN numeric entity.", "input":"€", -"output": ["ParseError", ["Character", "\u20AC"]]}, +"output": ["ParseError", ["Character", "\u20AC"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u0081"]]}, +"output": ["ParseError", ["Character", "\u0081"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", "input":"‚", -"output": ["ParseError", ["Character", "\u201A"]]}, +"output": ["ParseError", ["Character", "\u201A"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", "input":"ƒ", -"output": ["ParseError", ["Character", "\u0192"]]}, +"output": ["ParseError", ["Character", "\u0192"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", "input":"„", -"output": ["ParseError", ["Character", "\u201E"]]}, +"output": ["ParseError", ["Character", "\u201E"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", "input":"…", -"output": ["ParseError", ["Character", "\u2026"]]}, +"output": ["ParseError", ["Character", "\u2026"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DAGGER numeric entity.", "input":"†", -"output": ["ParseError", ["Character", "\u2020"]]}, +"output": ["ParseError", ["Character", "\u2020"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DOUBLE DAGGER numeric entity.", "input":"‡", -"output": ["ParseError", ["Character", "\u2021"]]}, +"output": ["ParseError", ["Character", "\u2021"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", "input":"ˆ", -"output": ["ParseError", ["Character", "\u02C6"]]}, +"output": ["ParseError", ["Character", "\u02C6"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 PER MILLE SIGN numeric entity.", "input":"‰", -"output": ["ParseError", ["Character", "\u2030"]]}, +"output": ["ParseError", ["Character", "\u2030"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", "input":"Š", -"output": ["ParseError", ["Character", "\u0160"]]}, +"output": ["ParseError", ["Character", "\u0160"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"‹", -"output": ["ParseError", ["Character", "\u2039"]]}, +"output": ["ParseError", ["Character", "\u2039"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", "input":"Œ", -"output": ["ParseError", ["Character", "\u0152"]]}, +"output": ["ParseError", ["Character", "\u0152"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u008D"]]}, +"output": ["ParseError", ["Character", "\u008D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", "input":"Ž", -"output": ["ParseError", ["Character", "\u017D"]]}, +"output": ["ParseError", ["Character", "\u017D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u008F"]]}, +"output": ["ParseError", ["Character", "\u008F"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u0090"]]}, +"output": ["ParseError", ["Character", "\u0090"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", "input":"‘", -"output": ["ParseError", ["Character", "\u2018"]]}, +"output": ["ParseError", ["Character", "\u2018"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", "input":"’", -"output": ["ParseError", ["Character", "\u2019"]]}, +"output": ["ParseError", ["Character", "\u2019"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", "input":"“", -"output": ["ParseError", ["Character", "\u201C"]]}, +"output": ["ParseError", ["Character", "\u201C"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", "input":"”", -"output": ["ParseError", ["Character", "\u201D"]]}, +"output": ["ParseError", ["Character", "\u201D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 BULLET numeric entity.", "input":"•", -"output": ["ParseError", ["Character", "\u2022"]]}, +"output": ["ParseError", ["Character", "\u2022"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EN DASH numeric entity.", "input":"–", -"output": ["ParseError", ["Character", "\u2013"]]}, +"output": ["ParseError", ["Character", "\u2013"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EM DASH numeric entity.", "input":"—", -"output": ["ParseError", ["Character", "\u2014"]]}, +"output": ["ParseError", ["Character", "\u2014"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SMALL TILDE numeric entity.", "input":"˜", -"output": ["ParseError", ["Character", "\u02DC"]]}, +"output": ["ParseError", ["Character", "\u02DC"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 TRADE MARK SIGN numeric entity.", "input":"™", -"output": ["ParseError", ["Character", "\u2122"]]}, +"output": ["ParseError", ["Character", "\u2122"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", "input":"š", -"output": ["ParseError", ["Character", "\u0161"]]}, +"output": ["ParseError", ["Character", "\u0161"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"›", -"output": ["ParseError", ["Character", "\u203A"]]}, +"output": ["ParseError", ["Character", "\u203A"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", "input":"œ", -"output": ["ParseError", ["Character", "\u0153"]]}, +"output": ["ParseError", ["Character", "\u0153"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u009D"]]}, +"output": ["ParseError", ["Character", "\u009D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", "input":"€", -"output": ["ParseError", ["Character", "\u20AC"]]}, +"output": ["ParseError", ["Character", "\u20AC"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u0081"]]}, +"output": ["ParseError", ["Character", "\u0081"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"‚", -"output": ["ParseError", ["Character", "\u201A"]]}, +"output": ["ParseError", ["Character", "\u201A"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", "input":"ƒ", -"output": ["ParseError", ["Character", "\u0192"]]}, +"output": ["ParseError", ["Character", "\u0192"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"„", -"output": ["ParseError", ["Character", "\u201E"]]}, +"output": ["ParseError", ["Character", "\u201E"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", "input":"…", -"output": ["ParseError", ["Character", "\u2026"]]}, +"output": ["ParseError", ["Character", "\u2026"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DAGGER hexadecimal numeric entity.", "input":"†", -"output": ["ParseError", ["Character", "\u2020"]]}, +"output": ["ParseError", ["Character", "\u2020"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", "input":"‡", -"output": ["ParseError", ["Character", "\u2021"]]}, +"output": ["ParseError", ["Character", "\u2021"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", "input":"ˆ", -"output": ["ParseError", ["Character", "\u02C6"]]}, +"output": ["ParseError", ["Character", "\u02C6"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", "input":"‰", -"output": ["ParseError", ["Character", "\u2030"]]}, +"output": ["ParseError", ["Character", "\u2030"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", "input":"Š", -"output": ["ParseError", ["Character", "\u0160"]]}, +"output": ["ParseError", ["Character", "\u0160"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‹", -"output": ["ParseError", ["Character", "\u2039"]]}, +"output": ["ParseError", ["Character", "\u2039"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", "input":"Œ", -"output": ["ParseError", ["Character", "\u0152"]]}, +"output": ["ParseError", ["Character", "\u0152"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u008D"]]}, +"output": ["ParseError", ["Character", "\u008D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"Ž", -"output": ["ParseError", ["Character", "\u017D"]]}, +"output": ["ParseError", ["Character", "\u017D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u008F"]]}, +"output": ["ParseError", ["Character", "\u008F"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u0090"]]}, +"output": ["ParseError", ["Character", "\u0090"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‘", -"output": ["ParseError", ["Character", "\u2018"]]}, +"output": ["ParseError", ["Character", "\u2018"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"’", -"output": ["ParseError", ["Character", "\u2019"]]}, +"output": ["ParseError", ["Character", "\u2019"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"“", -"output": ["ParseError", ["Character", "\u201C"]]}, +"output": ["ParseError", ["Character", "\u201C"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"”", -"output": ["ParseError", ["Character", "\u201D"]]}, +"output": ["ParseError", ["Character", "\u201D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 BULLET hexadecimal numeric entity.", "input":"•", -"output": ["ParseError", ["Character", "\u2022"]]}, +"output": ["ParseError", ["Character", "\u2022"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EN DASH hexadecimal numeric entity.", "input":"–", -"output": ["ParseError", ["Character", "\u2013"]]}, +"output": ["ParseError", ["Character", "\u2013"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 EM DASH hexadecimal numeric entity.", "input":"—", -"output": ["ParseError", ["Character", "\u2014"]]}, +"output": ["ParseError", ["Character", "\u2014"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", "input":"˜", -"output": ["ParseError", ["Character", "\u02DC"]]}, +"output": ["ParseError", ["Character", "\u02DC"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", "input":"™", -"output": ["ParseError", ["Character", "\u2122"]]}, +"output": ["ParseError", ["Character", "\u2122"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", "input":"š", -"output": ["ParseError", ["Character", "\u0161"]]}, +"output": ["ParseError", ["Character", "\u0161"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"›", -"output": ["ParseError", ["Character", "\u203A"]]}, +"output": ["ParseError", ["Character", "\u203A"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", "input":"œ", -"output": ["ParseError", ["Character", "\u0153"]]}, +"output": ["ParseError", ["Character", "\u0153"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", -"output": ["ParseError", ["Character", "\u009D"]]}, +"output": ["ParseError", ["Character", "\u009D"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"ž", -"output": ["ParseError", ["Character", "\u017E"]]}, +"output": ["ParseError", ["Character", "\u017E"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", "input":"Ÿ", -"output": ["ParseError", ["Character", "\u0178"]]}, +"output": ["ParseError", ["Character", "\u0178"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } +]}, {"description": "Decimal numeric entity followed by hex character a.", "input":"aa", diff --git a/tokenizer/numericEntities.test b/tokenizer/numericEntities.test index 445e16a4..6dceac12 100644 --- a/tokenizer/numericEntities.test +++ b/tokenizer/numericEntities.test @@ -4,443 +4,753 @@ "input": "�", "output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 14 } ]}, {"description": "Invalid unterminated numeric entity character overflow before EOF", "input": "�", "output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 13 } ]}, {"description": "Invalid unterminated numeric entity character overflow before EOF", "input": "�", "output": ["ParseError", "ParseError", ["Character", "\uFFFD"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 15 } ]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", "output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 14 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 14 } ]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", "output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 13 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 13 } ]}, {"description": "Invalid unterminated numeric entity character overflow", "input": "�x", "output": ["ParseError", "ParseError", ["Character", "\uFFFDx"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 15 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 15 } ]}, {"description": "Invalid numeric entity character overflow", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 15 } +]}, {"description": "Invalid numeric entity character overflow", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 14 } +]}, {"description": "Invalid numeric entity character overflow", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 16 } +]}, {"description": "Invalid numeric entity character U+0000", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "null-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0001", "input": "", -"output": ["ParseError", ["Character", "\u0001"]]}, +"output": ["ParseError", ["Character", "\u0001"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0002", "input": "", -"output": ["ParseError", ["Character", "\u0002"]]}, +"output": ["ParseError", ["Character", "\u0002"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, + {"description": "Invalid numeric entity character U+0003", "input": "", -"output": ["ParseError", ["Character", "\u0003"]]}, +"output": ["ParseError", ["Character", "\u0003"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, + {"description": "Invalid numeric entity character U+0004", "input": "", -"output": ["ParseError", ["Character", "\u0004"]]}, +"output": ["ParseError", ["Character", "\u0004"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, + {"description": "Invalid numeric entity character U+0005", "input": "", -"output": ["ParseError", ["Character", "\u0005"]]}, +"output": ["ParseError", ["Character", "\u0005"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, + {"description": "Invalid numeric entity character U+0006", "input": "", -"output": ["ParseError", ["Character", "\u0006"]]}, +"output": ["ParseError", ["Character", "\u0006"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0007", "input": "", -"output": ["ParseError", ["Character", "\u0007"]]}, +"output": ["ParseError", ["Character", "\u0007"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0008", "input": "", -"output": ["ParseError", ["Character", "\u0008"]]}, +"output": ["ParseError", ["Character", "\u0008"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+000B", "input": " ", -"output": ["ParseError", ["Character", "\u000b"]]}, +"output": ["ParseError", ["Character", "\u000b"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+000E", "input": "", -"output": ["ParseError", ["Character", "\u000e"]]}, +"output": ["ParseError", ["Character", "\u000e"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+000F", "input": "", -"output": ["ParseError", ["Character", "\u000f"]]}, +"output": ["ParseError", ["Character", "\u000f"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0010", "input": "", -"output": ["ParseError", ["Character", "\u0010"]]}, +"output": ["ParseError", ["Character", "\u0010"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0011", "input": "", -"output": ["ParseError", ["Character", "\u0011"]]}, +"output": ["ParseError", ["Character", "\u0011"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0012", "input": "", -"output": ["ParseError", ["Character", "\u0012"]]}, +"output": ["ParseError", ["Character", "\u0012"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0013", "input": "", -"output": ["ParseError", ["Character", "\u0013"]]}, +"output": ["ParseError", ["Character", "\u0013"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0014", "input": "", -"output": ["ParseError", ["Character", "\u0014"]]}, +"output": ["ParseError", ["Character", "\u0014"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0015", "input": "", -"output": ["ParseError", ["Character", "\u0015"]]}, +"output": ["ParseError", ["Character", "\u0015"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0016", "input": "", -"output": ["ParseError", ["Character", "\u0016"]]}, +"output": ["ParseError", ["Character", "\u0016"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0017", "input": "", -"output": ["ParseError", ["Character", "\u0017"]]}, +"output": ["ParseError", ["Character", "\u0017"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0018", "input": "", -"output": ["ParseError", ["Character", "\u0018"]]}, +"output": ["ParseError", ["Character", "\u0018"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+0019", "input": "", -"output": ["ParseError", ["Character", "\u0019"]]}, +"output": ["ParseError", ["Character", "\u0019"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001A", "input": "", -"output": ["ParseError", ["Character", "\u001a"]]}, +"output": ["ParseError", ["Character", "\u001a"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001B", "input": "", -"output": ["ParseError", ["Character", "\u001b"]]}, +"output": ["ParseError", ["Character", "\u001b"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001C", "input": "", -"output": ["ParseError", ["Character", "\u001c"]]}, +"output": ["ParseError", ["Character", "\u001c"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001D", "input": "", -"output": ["ParseError", ["Character", "\u001d"]]}, +"output": ["ParseError", ["Character", "\u001d"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001E", "input": "", -"output": ["ParseError", ["Character", "\u001e"]]}, +"output": ["ParseError", ["Character", "\u001e"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+001F", "input": "", -"output": ["ParseError", ["Character", "\u001f"]]}, +"output": ["ParseError", ["Character", "\u001f"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+007F", "input": "", -"output": ["ParseError", ["Character", "\u007f"]]}, +"output": ["ParseError", ["Character", "\u007f"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+D800", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+DFFF", "input": "�", -"output": ["ParseError", ["Character", "\uFFFD"]]}, +"output": ["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD0", "input": "﷐", -"output": ["ParseError", ["Character", "\ufdd0"]]}, +"output": ["ParseError", ["Character", "\ufdd0"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD1", "input": "﷑", -"output": ["ParseError", ["Character", "\ufdd1"]]}, +"output": ["ParseError", ["Character", "\ufdd1"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD2", "input": "﷒", -"output": ["ParseError", ["Character", "\ufdd2"]]}, +"output": ["ParseError", ["Character", "\ufdd2"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD3", "input": "﷓", -"output": ["ParseError", ["Character", "\ufdd3"]]}, +"output": ["ParseError", ["Character", "\ufdd3"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD4", "input": "﷔", -"output": ["ParseError", ["Character", "\ufdd4"]]}, +"output": ["ParseError", ["Character", "\ufdd4"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD5", "input": "﷕", -"output": ["ParseError", ["Character", "\ufdd5"]]}, +"output": ["ParseError", ["Character", "\ufdd5"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD6", "input": "﷖", -"output": ["ParseError", ["Character", "\ufdd6"]]}, +"output": ["ParseError", ["Character", "\ufdd6"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD7", "input": "﷗", -"output": ["ParseError", ["Character", "\ufdd7"]]}, +"output": ["ParseError", ["Character", "\ufdd7"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD8", "input": "﷘", -"output": ["ParseError", ["Character", "\ufdd8"]]}, +"output": ["ParseError", ["Character", "\ufdd8"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDD9", "input": "﷙", -"output": ["ParseError", ["Character", "\ufdd9"]]}, +"output": ["ParseError", ["Character", "\ufdd9"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDA", "input": "﷚", -"output": ["ParseError", ["Character", "\ufdda"]]}, +"output": ["ParseError", ["Character", "\ufdda"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDB", "input": "﷛", -"output": ["ParseError", ["Character", "\ufddb"]]}, +"output": ["ParseError", ["Character", "\ufddb"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDC", "input": "﷜", -"output": ["ParseError", ["Character", "\ufddc"]]}, +"output": ["ParseError", ["Character", "\ufddc"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDD", "input": "﷝", -"output": ["ParseError", ["Character", "\ufddd"]]}, +"output": ["ParseError", ["Character", "\ufddd"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDE", "input": "﷞", -"output": ["ParseError", ["Character", "\ufdde"]]}, +"output": ["ParseError", ["Character", "\ufdde"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDDF", "input": "﷟", -"output": ["ParseError", ["Character", "\ufddf"]]}, +"output": ["ParseError", ["Character", "\ufddf"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE0", "input": "﷠", -"output": ["ParseError", ["Character", "\ufde0"]]}, +"output": ["ParseError", ["Character", "\ufde0"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE1", "input": "﷡", -"output": ["ParseError", ["Character", "\ufde1"]]}, +"output": ["ParseError", ["Character", "\ufde1"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE2", "input": "﷢", -"output": ["ParseError", ["Character", "\ufde2"]]}, +"output": ["ParseError", ["Character", "\ufde2"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE3", "input": "﷣", -"output": ["ParseError", ["Character", "\ufde3"]]}, +"output": ["ParseError", ["Character", "\ufde3"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE4", "input": "﷤", -"output": ["ParseError", ["Character", "\ufde4"]]}, +"output": ["ParseError", ["Character", "\ufde4"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE5", "input": "﷥", -"output": ["ParseError", ["Character", "\ufde5"]]}, +"output": ["ParseError", ["Character", "\ufde5"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE6", "input": "﷦", -"output": ["ParseError", ["Character", "\ufde6"]]}, +"output": ["ParseError", ["Character", "\ufde6"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE7", "input": "﷧", -"output": ["ParseError", ["Character", "\ufde7"]]}, +"output": ["ParseError", ["Character", "\ufde7"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE8", "input": "﷨", -"output": ["ParseError", ["Character", "\ufde8"]]}, +"output": ["ParseError", ["Character", "\ufde8"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDE9", "input": "﷩", -"output": ["ParseError", ["Character", "\ufde9"]]}, +"output": ["ParseError", ["Character", "\ufde9"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDEA", "input": "﷪", -"output": ["ParseError", ["Character", "\ufdea"]]}, +"output": ["ParseError", ["Character", "\ufdea"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDEB", "input": "﷫", -"output": ["ParseError", ["Character", "\ufdeb"]]}, +"output": ["ParseError", ["Character", "\ufdeb"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDEC", "input": "﷬", -"output": ["ParseError", ["Character", "\ufdec"]]}, +"output": ["ParseError", ["Character", "\ufdec"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDED", "input": "﷭", -"output": ["ParseError", ["Character", "\ufded"]]}, +"output": ["ParseError", ["Character", "\ufded"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDEE", "input": "﷮", -"output": ["ParseError", ["Character", "\ufdee"]]}, +"output": ["ParseError", ["Character", "\ufdee"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FDEF", "input": "﷯", -"output": ["ParseError", ["Character", "\ufdef"]]}, +"output": ["ParseError", ["Character", "\ufdef"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FFFE", "input": "￾", -"output": ["ParseError", ["Character", "\ufffe"]]}, +"output": ["ParseError", ["Character", "\ufffe"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+FFFF", "input": "￿", -"output": ["ParseError", ["Character", "\uffff"]]}, +"output": ["ParseError", ["Character", "\uffff"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } +]}, {"description": "Invalid numeric entity character U+1FFFE", "input": "🿾", -"output": ["ParseError", ["Character", "\uD83F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD83F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+1FFFF", "input": "🿿", -"output": ["ParseError", ["Character", "\uD83F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD83F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+2FFFE", "input": "𯿾", -"output": ["ParseError", ["Character", "\uD87F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD87F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+2FFFF", "input": "𯿿", -"output": ["ParseError", ["Character", "\uD87F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD87F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+3FFFE", "input": "𿿾", -"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD8BF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+3FFFF", "input": "𿿿", -"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD8BF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+4FFFE", "input": "񏿾", -"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD8FF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+4FFFF", "input": "񏿿", -"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD8FF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+5FFFE", "input": "񟿾", -"output": ["ParseError", ["Character", "\uD93F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD93F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+5FFFF", "input": "񟿿", -"output": ["ParseError", ["Character", "\uD93F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD93F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+6FFFE", "input": "񯿾", -"output": ["ParseError", ["Character", "\uD97F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD97F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+6FFFF", "input": "񯿿", -"output": ["ParseError", ["Character", "\uD97F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD97F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+7FFFE", "input": "񿿾", -"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD9BF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+7FFFF", "input": "񿿿", -"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD9BF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+8FFFE", "input": "򏿾", -"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uD9FF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+8FFFF", "input": "򏿿", -"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uD9FF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+9FFFE", "input": "򟿾", -"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDA3F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+9FFFF", "input": "򟿿", -"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDA3F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+AFFFE", "input": "򯿾", -"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDA7F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+AFFFF", "input": "򯿿", -"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDA7F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+BFFFE", "input": "򿿾", -"output": ["ParseError", ["Character", "\uDABF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDABF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+BFFFF", "input": "򿿿", -"output": ["ParseError", ["Character", "\uDABF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDABF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+CFFFE", "input": "󏿾", -"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDAFF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+CFFFF", "input": "󏿿", -"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDAFF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+DFFFE", "input": "󟿾", -"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDB3F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+DFFFF", "input": "󟿿", -"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDB3F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+EFFFE", "input": "󯿾", -"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDB7F\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+EFFFF", "input": "󯿿", -"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDB7F\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+FFFFE", "input": "󿿾", -"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDBBF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+FFFFF", "input": "󿿿", -"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDBBF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } +]}, {"description": "Invalid numeric entity character U+10FFFE", "input": "􏿾", -"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]]}, +"output": ["ParseError", ["Character", "\uDBFF\uDFFE"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } +]}, {"description": "Invalid numeric entity character U+10FFFF", "input": "􏿿", -"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]]}, +"output": ["ParseError", ["Character", "\uDBFF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } +]}, {"description": "Valid numeric entity character U+0009", "input": " ", diff --git a/tokenizer/test2.test b/tokenizer/test2.test index dbf08014..b1a40363 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -58,23 +58,39 @@ {"description":"Numeric entity representing the NUL character", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "null-character-reference", "line": 1, "col": 8 } +]}, {"description":"Hexadecimal entity representing the NUL character", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "null-character-reference", "line": 1, "col": 9 } +]}, {"description":"Numeric entity representing a codepoint after 1114111 (U+10FFFF)", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 11 } +]}, {"description":"Hexadecimal entity representing a codepoint after 1114111 (U+10FFFF)", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 13 } +]}, {"description":"Hexadecimal entity pair representing a surrogate pair", "input":"��", -"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 9 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 17 } +]}, {"description":"Hexadecimal entity with mixed uppercase and lowercase", "input":"ꯍ", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index d00d36c5..66ea9b3a 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -165,14 +165,16 @@ "input":"�", "output":["ParseError", "ParseError", ["Character", "\uFFFD"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 5 }, + { "code": "null-character-reference", "line": 1, "col": 5 } ]}, {"description":"Zero decimal numeric entity", "input":"�", "output":["ParseError", "ParseError", ["Character", "\uFFFD"]], "errors":[ - { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 } + { "code": "missing-semicolon-after-character-reference", "line": 1, "col": 4 }, + { "code": "null-character-reference", "line": 1, "col": 4 } ]}, {"description":"Zero-prefixed hex numeric entity", @@ -219,35 +221,63 @@ {"description":"Maximum non-BMP numeric entity", "input":"􏿿", -"output":["ParseError", ["Character", "\uDBFF\uDFFF"]]}, +"output":["ParseError", ["Character", "\uDBFF\uDFFF"]], +"errors":[ + { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } +]}, + {"description":"Above maximum numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 11 } +]}, {"description":"32-bit hex numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 13 } +]}, {"description":"33-bit hex numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 14 } +]}, {"description":"33-bit decimal numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 14 } +]}, {"description":"65-bit hex numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 22 } +]}, {"description":"65-bit decimal numeric entity", "input":"�", -"output":["ParseError", ["Character", "\uFFFD"]]}, +"output":["ParseError", ["Character", "\uFFFD"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 24 } +]}, {"description":"Surrogate code point edge cases", "input":"퟿����", -"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]]}, +"output":[["Character", "\uD7FF"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD"], "ParseError", ["Character", "\uFFFD\uE000"]], +"errors":[ + { "code": "non-unicode-character-reference", "line": 1, "col": 17 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 25 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 33 }, + { "code": "non-unicode-character-reference", "line": 1, "col": 41 } +]}, {"description":"Uppercase start tag name", "input":"", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index d9396fea..c3dad356 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -232,6 +232,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) null-character-reference #document | | @@ -263,6 +265,8 @@ FOO€ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -274,6 +278,8 @@ FOOZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -285,6 +291,8 @@ FOO‚ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -296,6 +304,8 @@ FOOƒZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -307,6 +317,8 @@ FOO„ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -318,6 +330,8 @@ FOO…ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -329,6 +343,8 @@ FOO†ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -340,6 +356,8 @@ FOO‡ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -351,6 +369,8 @@ FOOˆZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -362,6 +382,8 @@ FOO‰ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -373,6 +395,8 @@ FOOŠZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -384,6 +408,8 @@ FOO‹ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -395,6 +421,8 @@ FOOŒZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -406,6 +434,8 @@ FOOZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -417,6 +447,8 @@ FOOŽZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -428,6 +460,8 @@ FOOZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -439,6 +473,8 @@ FOOZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -450,6 +486,8 @@ FOO‘ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -461,6 +499,8 @@ FOO’ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -472,6 +512,8 @@ FOO“ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -483,6 +525,8 @@ FOO”ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -494,6 +538,8 @@ FOO•ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -505,6 +551,8 @@ FOO–ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -516,6 +564,8 @@ FOO—ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -527,6 +577,8 @@ FOO˜ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -538,6 +590,8 @@ FOO™ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -549,6 +603,8 @@ FOOšZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -560,6 +616,8 @@ FOO›ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -571,6 +629,8 @@ FOOœZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -582,6 +642,8 @@ FOOZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -593,6 +655,8 @@ FOOžZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -604,6 +668,8 @@ FOOŸZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) control-or-undefined-character-reference #document | | @@ -635,6 +701,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) non-unicode-character-reference #document | | @@ -646,6 +714,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) non-unicode-character-reference #document | | @@ -657,6 +727,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) non-unicode-character-reference #document | | @@ -668,6 +740,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity +#new-errors +(1:12) non-unicode-character-reference #document | | @@ -689,6 +763,8 @@ FOO􏿾ZOO #errors (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity +#new-errors +(1:14) control-or-undefined-character-reference #document | | @@ -710,6 +786,8 @@ FOO􏿿ZOO #errors (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity +#new-errors +(1:14) control-or-undefined-character-reference #document | | @@ -721,6 +799,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity +#new-errors +(1:14) non-unicode-character-reference #document | | @@ -732,6 +812,8 @@ FOO�ZOO #errors (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity +#new-errors +(1:14) non-unicode-character-reference #document | | @@ -746,6 +828,7 @@ FOO� (1,13): eof-in-numeric-entity #new-errors (1:17) missing-semicolon-after-character-reference +(1:17) non-unicode-character-reference #document | | @@ -760,6 +843,7 @@ FOO� (1,13): eof-in-numeric-entity #new-errors (1:16) missing-semicolon-after-character-reference +(1:16) non-unicode-character-reference #document | | @@ -774,6 +858,7 @@ FOO� (1,13): eof-in-numeric-entity #new-errors (1:18) missing-semicolon-after-character-reference +(1:18) non-unicode-character-reference #document | | @@ -788,6 +873,7 @@ FOO�ZOO (1,16): illegal-codepoint-for-numeric-entity #new-errors (1:17) missing-semicolon-after-character-reference +(1:17) non-unicode-character-reference #document | | @@ -802,6 +888,7 @@ FOO�ZOO (1,15): illegal-codepoint-for-numeric-entity #new-errors (1:16) missing-semicolon-after-character-reference +(1:16) non-unicode-character-reference #document | | @@ -816,6 +903,7 @@ FOO�ZOO (1,17): illegal-codepoint-for-numeric-entity #new-errors (1:18) missing-semicolon-after-character-reference +(1:18) non-unicode-character-reference #document | | diff --git a/tree-construction/plain-text-unsafe.dat b/tree-construction/plain-text-unsafe.dat index c84c33aedc212077f3d988b18e0098fa90868f1c..7aaf3c9d3f11d2f16ed432079758e361058725c5 100644 GIT binary patch delta 73 zcmeD3IOZ{-&?+yrTsO6-D8HzfOT*C0&`47uIX|zYC_hIxzeu+ Date: Fri, 21 Apr 2017 15:55:33 +0300 Subject: [PATCH 45/82] Add DOCTYPE state errors. --- tokenizer/test2.test | 10 +- tokenizer/test3.test | 1911 +++++++++++++++++++++++++------ tree-construction/doctype01.dat | 2 + tree-construction/tests2.dat | 10 + 4 files changed, 1551 insertions(+), 382 deletions(-) diff --git a/tokenizer/test2.test b/tokenizer/test2.test index b1a40363..b0729ee8 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -6,11 +6,17 @@ {"description":"DOCTYPE without space before name", "input":"", -"output":["ParseError", ["DOCTYPE", "html", null, null, true]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"Incorrect DOCTYPE without a space before name", "input":"", -"output":["ParseError", ["DOCTYPE", "foo", null, null, true]]}, +"output":["ParseError", ["DOCTYPE", "foo", null, null, true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"DOCTYPE with publicId", "input":"", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 5d52c0d7..99028035 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -1526,17 +1526,24 @@ {"description":"", "input":"", @@ -3329,1612 +3371,2721 @@ {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, null, true]]}, +"output":["ParseError", ["DOCTYPE", "a", null, null, true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]}, +"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, true]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]}, +"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]}, +"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]}, +"output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, null, true]]}, +"output":["ParseError", ["DOCTYPE", "a", null, null, true]], +"errors":[ + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } +]}, {"description":"Hello #errors (1,9): need-space-after-doctype +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index b2ce7169..cc7ae370 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -253,6 +253,8 @@ #errors (1,9): need-space-after-doctype (1,54): expected-named-closing-tag-but-got-eof +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | @@ -338,6 +340,8 @@

  • #errors (1,9): need-space-after-doctype +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | @@ -350,6 +354,8 @@

    #errors (1,9): need-space-after-doctype +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | @@ -362,6 +368,8 @@

    #errors (1,9): need-space-after-doctype +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | @@ -375,6 +383,8 @@ #errors (1,9): need-space-after-doctype (1,23): expected-closing-tag-but-got-eof +#new-errors +(1:10) missing-whitespace-before-doctype-name #document | | From 99400c48cdb430ccd225c3b76f60b44f4a341df8 Mon Sep 17 00:00:00 2001 From: inikulin Date: Fri, 21 Apr 2017 16:28:09 +0300 Subject: [PATCH 46/82] Add Before DOCTYPE name state errors. --- tokenizer/test2.test | 5 ++- tokenizer/test3.test | 68 ++++++++++++++++++++++++++------- tree-construction/doctype01.dat | 4 ++ tree-construction/tests6.dat | 2 + 4 files changed, 64 insertions(+), 15 deletions(-) diff --git a/tokenizer/test2.test b/tokenizer/test2.test index b0729ee8..725af25d 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -2,7 +2,10 @@ {"description":"DOCTYPE without name", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]], +"errors":[ + { "code": "missing-doctype-name", "line": 1, "col": 10 } +]}, {"description":"DOCTYPE without space before name", "input":"", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 99028035..e03e986d 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -1535,7 +1535,8 @@ "input":"", "input":"", -"output":["ParseError", ["DOCTYPE", null, null, null, false]]}, +"output":["ParseError", ["DOCTYPE", null, null, null, false]], +"errors":[ + { "code": "missing-doctype-name", "line": 1, "col": 11 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", null, null, null, false]], +"errors":[ + { "code": "missing-doctype-name", "line": 1, "col": 10 } +]}, {"description":" | @@ -49,6 +51,8 @@ #errors (1,11): expected-doctype-name-but-got-right-bracket (1,11): unknown-doctype +#new-errors +(1:11) missing-doctype-name #document | | diff --git a/tree-construction/tests6.dat b/tree-construction/tests6.dat index 3ad5bfed..f3991232 100644 --- a/tree-construction/tests6.dat +++ b/tree-construction/tests6.dat @@ -51,6 +51,8 @@ (1,9): need-space-after-doctype (1,10): expected-doctype-name-but-got-right-bracket (1,10): unknown-doctype +#new-errors +(1:10) missing-doctype-name #document | | From fd209adf751fd37da9c31a7f2d7809934061fa9c Mon Sep 17 00:00:00 2001 From: inikulin Date: Fri, 21 Apr 2017 17:41:32 +0300 Subject: [PATCH 47/82] Add DOCTYPE name state errors. --- tokenizer/test1.test | 6 +- tokenizer/test3.test | 449 ++++++++++++++++++++++++++++++++----------- 2 files changed, 339 insertions(+), 116 deletions(-) diff --git a/tokenizer/test1.test b/tokenizer/test1.test index f618f6f9..1bf4f1c4 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -4,6 +4,7 @@ "input":"", "output":[["DOCTYPE", "html", null, null, true]]}, + {"description":"Correct Doctype uppercase", "input":"", "output":[["DOCTYPE", "html", null, null, true]]}, @@ -14,7 +15,10 @@ {"description":"Correct Doctype case with EOF", "input":"", diff --git a/tokenizer/test3.test b/tokenizer/test3.test index e03e986d..435660b0 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -1536,7 +1536,8 @@ "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "\uFFFD", null, null, false]], "errors":[ { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, - { "code": "unexpected-null-character", "line": 1, "col": 10 } + { "code": "unexpected-null-character", "line": 1, "col": 10 }, + { "code": "eof-in-doctype", "line": 1, "col": 11 } ]}, {"description":"", "input":"", @@ -1714,49 +1755,81 @@ {"description":"", "input":"", @@ -3251,155 +3359,223 @@ {"description":"", @@ -3413,63 +3589,73 @@ "input":"", @@ -5998,133 +6198,152 @@ "input":" Date: Sat, 22 Apr 2017 00:47:04 +0300 Subject: [PATCH 48/82] Add After DOCTYPE name state errors. --- tokenizer/domjs.test | 5 +- tokenizer/test2.test | 5 +- tokenizer/test3.test | 540 ++++++++++++++++++++++++-------- tokenizer/test4.test | 30 +- tree-construction/doctype01.dat | 8 + tree-construction/tests2.dat | 1 + 6 files changed, 445 insertions(+), 144 deletions(-) diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index 2ea76785..d1992bae 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -162,7 +162,10 @@ { "description":"space EOF after doctype ", "input":"", "input":"", @@ -1963,19 +2032,31 @@ {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", ["DOCTYPE", "a", null, null, false]], +"errors":[ + { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 13 } +]}, {"description":"", @@ -3874,28 +4098,32 @@ "input":"", "input":"", "output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 12 } ]}, {"description":"", -"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, false]], +"errors":[ + { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, {"description":"Doctype publi", "input":"", -"output":["ParseError", ["DOCTYPE", "html", null, null, false]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, false]], +"errors":[ + { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, {"description":"Doctype sys", "input":"text", "input":"text", -"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]]}, +"output":["ParseError", ["DOCTYPE", "html", null, null, false], ["Character", "text"]], +"errors":[ + { "code": "invalid-character-sequence-after-doctype-name", "line": 1, "col": 16 } +]}, {"description":"Grave accent in unquoted attribute", "input":"", diff --git a/tree-construction/doctype01.dat b/tree-construction/doctype01.dat index 656886db..2c67398f 100644 --- a/tree-construction/doctype01.dat +++ b/tree-construction/doctype01.dat @@ -87,6 +87,8 @@ #errors (1,17): expected-space-or-right-bracket-in-doctype (1,22): unknown-doctype +#new-errors +(1:18) invalid-character-sequence-after-doctype-name #document | | @@ -99,6 +101,8 @@ #errors (1,17): expected-space-or-right-bracket-in-doctype (1,27): unknown-doctype +#new-errors +(1:18) invalid-character-sequence-after-doctype-name #document | | @@ -204,6 +208,8 @@ #errors (1,17): expected-space-or-right-bracket-in-doctype (1,35): unknown-doctype +#new-errors +(1:18) invalid-character-sequence-after-doctype-name #document | | @@ -356,6 +362,8 @@ #errors (1,23): expected-space-or-right-bracket-in-doctype (2,30): unknown-doctype +#new-errors +(1:24) invalid-character-sequence-after-doctype-name #document | | diff --git a/tree-construction/tests2.dat b/tree-construction/tests2.dat index cc7ae370..efc5da37 100644 --- a/tree-construction/tests2.dat +++ b/tree-construction/tests2.dat @@ -792,6 +792,7 @@ x { content:" From 4d0d34bcdd10bdbf1bcb82d630b98e9600a36e15 Mon Sep 17 00:00:00 2001 From: inikulin Date: Sat, 22 Apr 2017 01:06:23 +0300 Subject: [PATCH 49/82] Add Script data double escaped dash state --- tokenizer/domjs.test | 3 ++- tree-construction/domjs-unsafe.dat | Bin 10246 -> 10291 bytes tree-construction/tests16.dat | 4 ++++ 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index d1992bae..5fcf1125 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -53,7 +53,8 @@ "input":"", "output":[["Character", ""]], "errors":[ - { "code": "unexpected-null-character", "line": 1, "col": 13 } + { "code": "unexpected-null-character", "line": 1, "col": 13 }, + { "code": "unexpected-null-character", "line": 1, "col": 30 } ] }, { diff --git a/tree-construction/domjs-unsafe.dat b/tree-construction/domjs-unsafe.dat index 9ba293bb73145db9c5c9091fd59fe418081ce25c..4ae248bba4987c8a995ed0bdbe2cbf625d685576 100644 GIT binary patch delta 27 jcmZn**c`B74&UUpEPPDHCX@HFWKF)n!?F1zpQt1Nm9`25 delta 24 gcmdlS&=#;^4&P)JImO8VBAS!m3Gi%|=NFL#0C8{!)c^nh diff --git a/tree-construction/tests16.dat b/tree-construction/tests16.dat index 34a708ec..d31ae8b4 100644 --- a/tree-construction/tests16.dat +++ b/tree-construction/tests16.dat @@ -657,6 +657,8 @@ #errors (1,36): eof-in-script-in-script (1,36): expected-named-closing-tag-but-got-eof +#new-errors +(1:37) eof-in-script-html-comment #document | | @@ -1957,6 +1959,8 @@ (1,8): expected-doctype-but-got-start-tag (1,21): eof-in-script-in-script (1,21): expected-named-closing-tag-but-got-eof +#new-errors +(1:22) eof-in-script-html-comment #document | | From 16c9a83f450fce5f21c93f1023abd7d385cf7f32 Mon Sep 17 00:00:00 2001 From: inikulin Date: Sat, 22 Apr 2017 01:17:59 +0300 Subject: [PATCH 50/82] Add Script data double escaped dash dash state errors. --- tokenizer/domjs.test | 21 ++++++++++++++++++++- tree-construction/domjs-unsafe.dat | Bin 10291 -> 10336 bytes tree-construction/tests16.dat | 4 ++++ 3 files changed, 24 insertions(+), 1 deletion(-) diff --git a/tokenizer/domjs.test b/tokenizer/domjs.test index 5fcf1125..d69cfb0b 100644 --- a/tokenizer/domjs.test +++ b/tokenizer/domjs.test @@ -54,7 +54,8 @@ "output":[["Character", ""]], "errors":[ { "code": "unexpected-null-character", "line": 1, "col": 13 }, - { "code": "unexpected-null-character", "line": 1, "col": 30 } + { "code": "unexpected-null-character", "line": 1, "col": 30 }, + { "code": "unexpected-null-character", "line": 1, "col": 48 } ] }, { @@ -84,6 +85,24 @@ { "code": "eof-in-script-html-comment", "line": 1, "col": 11 } ] }, + { + "description":"EOF in script HTML comment double escaped after dash", + "initialStates":["Script data state"], + "input":"baz", "output":[["Character", "foo"], ["EndTag", "xmp"], "ParseError", ["Comment", ""], ["Character", "baz"], ["EndTag", "xmp"]], "errors":[ - { "code": "abrupt-comment", "line": 1, "col": 19 } + { "code": "abrupt-closing-of-comment", "line": 1, "col": 19 } ]}, {"description":"Commented entities in RCDATA", diff --git a/tokenizer/test1.test b/tokenizer/test1.test index 1bf4f1c4..5949abea 100644 --- a/tokenizer/test1.test +++ b/tokenizer/test1.test @@ -117,7 +117,7 @@ "input":"", "output":["ParseError", ["Comment", ""]], "errors":[ - { "code": "abrupt-comment", "line": 1, "col": 5 } + { "code": "abrupt-closing-of-comment", "line": 1, "col": 5 } ]}, @@ -125,7 +125,7 @@ "input":"", "output":["ParseError", ["Comment", ""]], "errors":[ - { "code": "abrupt-comment", "line": 1, "col": 6 } + { "code": "abrupt-closing-of-comment", "line": 1, "col": 6 } ]}, {"description":"Short comment three", diff --git a/tree-construction/comments01.dat b/tree-construction/comments01.dat index b6d07334..153dcdcc 100644 --- a/tree-construction/comments01.dat +++ b/tree-construction/comments01.dat @@ -106,7 +106,7 @@ FOOBAZ (1,3): expected-doctype-but-got-chars (1,9): incorrect-comment #new-errors -(1:9) abrupt-comment +(1:9) abrupt-closing-of-comment #document | | @@ -121,7 +121,7 @@ FOOBAZ (1,3): expected-doctype-but-got-chars (1,8): incorrect-comment #new-errors -(1:8) abrupt-comment +(1:8) abrupt-closing-of-comment #document | | diff --git a/tree-construction/tests1.dat b/tree-construction/tests1.dat index 53b31b50..09cc9bda 100644 --- a/tree-construction/tests1.dat +++ b/tree-construction/tests1.dat @@ -328,8 +328,8 @@ Line1
    Line2
    Line3
    Line4 (1,17): incorrect-comment (1,17): expected-closing-tag-but-got-eof #new-errors -(1:5) abrupt-comment -(1:17) abrupt-comment +(1:5) abrupt-closing-of-comment +(1:17) abrupt-closing-of-comment #document | | From 03a008d9ad20dda36c05c26c1e5f50b45c3720a2 Mon Sep 17 00:00:00 2001 From: inikulin Date: Tue, 9 May 2017 22:27:12 +0300 Subject: [PATCH 55/82] Split control and undefined character errors for input stream --- tokenizer/test3.test | 172 ++++++++++++++++----------------- tokenizer/test4.test | 6 +- tokenizer/unicodeChars.test | 188 ++++++++++++++++++------------------ 3 files changed, 183 insertions(+), 183 deletions(-) diff --git a/tokenizer/test3.test b/tokenizer/test3.test index 8f567c8d..42f4567e 100644 --- a/tokenizer/test3.test +++ b/tokenizer/test3.test @@ -16,7 +16,7 @@ "input":"\u000B", "output":["ParseError", ["Character", "\u000B"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description":"\\u000C", @@ -112,7 +112,7 @@ "input":"<\u000B", "output":["ParseError", "ParseError", ["Character", "<\u000B"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 2 }, + { "code": "control-character-in-input-stream", "line": 1, "col": 2 }, { "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 } ]}, @@ -162,7 +162,7 @@ "input":"", "output":["ParseError", ["StartTag", "a\u0008", {}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } + { "code": "control-character-in-input-stream", "line": 1, "col": 3 } ]}, {"description":"", @@ -7255,7 +7255,7 @@ "input":"", "output":["ParseError", ["StartTag", "a\u000B", {}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } + { "code": "control-character-in-input-stream", "line": 1, "col": 3 } ]}, {"description":"", @@ -7270,7 +7270,7 @@ "input":"", "output":["ParseError", ["StartTag", "a\u001F", {}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 3 } + { "code": "control-character-in-input-stream", "line": 1, "col": 3 } ]}, {"description":"
    ", @@ -7288,7 +7288,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"\u0008":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } + { "code": "control-character-in-input-stream", "line": 1, "col": 4 } ]}, {"description":"", @@ -7303,7 +7303,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"\u000B":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } + { "code": "control-character-in-input-stream", "line": 1, "col": 4 } ]}, {"description":"", @@ -7318,7 +7318,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"\u001F":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 } + { "code": "control-character-in-input-stream", "line": 1, "col": 4 } ]}, {"description":"", @@ -7444,7 +7444,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a\u0008":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } + { "code": "control-character-in-input-stream", "line": 1, "col": 5 } ]}, {"description":"", @@ -7459,7 +7459,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a\u000B":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } + { "code": "control-character-in-input-stream", "line": 1, "col": 5 } ]}, {"description":"", @@ -7474,7 +7474,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a\u001F":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 5 } + { "code": "control-character-in-input-stream", "line": 1, "col": 5 } ]}, {"description":"", @@ -7492,7 +7492,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7507,7 +7507,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7522,7 +7522,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7736,7 +7736,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"\u0008"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7757,7 +7757,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7778,7 +7778,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"\u001F"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"", @@ -7815,7 +7815,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } + { "code": "control-character-in-input-stream", "line": 1, "col": 7 } ]}, {"description":"", @@ -7969,7 +7969,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"\u000B"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } + { "code": "control-character-in-input-stream", "line": 1, "col": 7 } ]}, {"description":"", @@ -8012,7 +8012,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u0008":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, + { "code": "control-character-in-input-stream", "line": 1, "col": 8 }, { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, @@ -8028,7 +8028,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u000B":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, + { "code": "control-character-in-input-stream", "line": 1, "col": 8 }, { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, @@ -8044,7 +8044,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"a":"", "\u001F":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 8 }, + { "code": "control-character-in-input-stream", "line": 1, "col": 8 }, { "code": "missing-whitespace-between-attributes", "line": 1, "col": 8 } ]}, @@ -8411,7 +8411,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a\u0008"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } + { "code": "control-character-in-input-stream", "line": 1, "col": 7 } ]}, {"description":"", @@ -8426,7 +8426,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a\u000B"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } + { "code": "control-character-in-input-stream", "line": 1, "col": 7 } ]}, {"description":"", @@ -8441,7 +8441,7 @@ "input":"", "output":["ParseError", ["StartTag", "a", {"a":"a\u001F"}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 7 } + { "code": "control-character-in-input-stream", "line": 1, "col": 7 } ]}, {"description":"", @@ -8729,7 +8729,7 @@ "input":"", "output":["ParseError", "ParseError", ["StartTag", "a", {"\u000B":""}]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 4 }, + { "code": "control-character-in-input-stream", "line": 1, "col": 4 }, { "code": "abruption-of-tag-self-closure", "line": 1, "col": 4 } ]}, diff --git a/tokenizer/test4.test b/tokenizer/test4.test index 67a75dc7..fe23df7b 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -370,7 +370,7 @@ "ignoreErrorOrder":true, "errors":[ { "code": "malformed-comment", "line": 1, "col": 3 }, - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "control-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"U+FDD1 in lookahead region", @@ -379,7 +379,7 @@ "ignoreErrorOrder":true, "errors":[ { "code": "malformed-comment", "line": 1, "col": 3 }, - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"U+1FFFF in lookahead region", @@ -388,7 +388,7 @@ "ignoreErrorOrder":true, "errors":[ { "code": "malformed-comment", "line": 1, "col": 3 }, - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 6 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 6 } ]}, {"description":"CR followed by non-LF", diff --git a/tokenizer/unicodeChars.test b/tokenizer/unicodeChars.test index e691aa96..e871219c 100644 --- a/tokenizer/unicodeChars.test +++ b/tokenizer/unicodeChars.test @@ -4,658 +4,658 @@ "input": "\u0001", "output": ["ParseError", ["Character", "\u0001"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0002", "input": "\u0002", "output": ["ParseError", ["Character", "\u0002"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0003", "input": "\u0003", "output": ["ParseError", ["Character", "\u0003"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0004", "input": "\u0004", "output": ["ParseError", ["Character", "\u0004"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0005", "input": "\u0005", "output": ["ParseError", ["Character", "\u0005"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0006", "input": "\u0006", "output": ["ParseError", ["Character", "\u0006"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0007", "input": "\u0007", "output": ["ParseError", ["Character", "\u0007"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0008", "input": "\u0008", "output": ["ParseError", ["Character", "\u0008"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+000B", "input": "\u000B", "output": ["ParseError", ["Character", "\u000B"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+000E", "input": "\u000E", "output": ["ParseError", ["Character", "\u000E"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+000F", "input": "\u000F", "output": ["ParseError", ["Character", "\u000F"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0010", "input": "\u0010", "output": ["ParseError", ["Character", "\u0010"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0011", "input": "\u0011", "output": ["ParseError", ["Character", "\u0011"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0012", "input": "\u0012", "output": ["ParseError", ["Character", "\u0012"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0013", "input": "\u0013", "output": ["ParseError", ["Character", "\u0013"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0014", "input": "\u0014", "output": ["ParseError", ["Character", "\u0014"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0015", "input": "\u0015", "output": ["ParseError", ["Character", "\u0015"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0016", "input": "\u0016", "output": ["ParseError", ["Character", "\u0016"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0017", "input": "\u0017", "output": ["ParseError", ["Character", "\u0017"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0018", "input": "\u0018", "output": ["ParseError", ["Character", "\u0018"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+0019", "input": "\u0019", "output": ["ParseError", ["Character", "\u0019"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001A", "input": "\u001A", "output": ["ParseError", ["Character", "\u001A"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001B", "input": "\u001B", "output": ["ParseError", ["Character", "\u001B"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001C", "input": "\u001C", "output": ["ParseError", ["Character", "\u001C"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001D", "input": "\u001D", "output": ["ParseError", ["Character", "\u001D"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001E", "input": "\u001E", "output": ["ParseError", ["Character", "\u001E"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+001F", "input": "\u001F", "output": ["ParseError", ["Character", "\u001F"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+007F", "input": "\u007F", "output": ["ParseError", ["Character", "\u007F"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "control-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD0", "input": "\uFDD0", "output": ["ParseError", ["Character", "\uFDD0"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD1", "input": "\uFDD1", "output": ["ParseError", ["Character", "\uFDD1"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD2", "input": "\uFDD2", "output": ["ParseError", ["Character", "\uFDD2"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD3", "input": "\uFDD3", "output": ["ParseError", ["Character", "\uFDD3"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD4", "input": "\uFDD4", "output": ["ParseError", ["Character", "\uFDD4"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD5", "input": "\uFDD5", "output": ["ParseError", ["Character", "\uFDD5"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD6", "input": "\uFDD6", "output": ["ParseError", ["Character", "\uFDD6"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD7", "input": "\uFDD7", "output": ["ParseError", ["Character", "\uFDD7"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD8", "input": "\uFDD8", "output": ["ParseError", ["Character", "\uFDD8"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDD9", "input": "\uFDD9", "output": ["ParseError", ["Character", "\uFDD9"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDA", "input": "\uFDDA", "output": ["ParseError", ["Character", "\uFDDA"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDB", "input": "\uFDDB", "output": ["ParseError", ["Character", "\uFDDB"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDC", "input": "\uFDDC", "output": ["ParseError", ["Character", "\uFDDC"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDD", "input": "\uFDDD", "output": ["ParseError", ["Character", "\uFDDD"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDE", "input": "\uFDDE", "output": ["ParseError", ["Character", "\uFDDE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDDF", "input": "\uFDDF", "output": ["ParseError", ["Character", "\uFDDF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE0", "input": "\uFDE0", "output": ["ParseError", ["Character", "\uFDE0"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE1", "input": "\uFDE1", "output": ["ParseError", ["Character", "\uFDE1"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE2", "input": "\uFDE2", "output": ["ParseError", ["Character", "\uFDE2"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE3", "input": "\uFDE3", "output": ["ParseError", ["Character", "\uFDE3"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE4", "input": "\uFDE4", "output": ["ParseError", ["Character", "\uFDE4"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE5", "input": "\uFDE5", "output": ["ParseError", ["Character", "\uFDE5"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE6", "input": "\uFDE6", "output": ["ParseError", ["Character", "\uFDE6"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE7", "input": "\uFDE7", "output": ["ParseError", ["Character", "\uFDE7"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE8", "input": "\uFDE8", "output": ["ParseError", ["Character", "\uFDE8"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDE9", "input": "\uFDE9", "output": ["ParseError", ["Character", "\uFDE9"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDEA", "input": "\uFDEA", "output": ["ParseError", ["Character", "\uFDEA"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDEB", "input": "\uFDEB", "output": ["ParseError", ["Character", "\uFDEB"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDEC", "input": "\uFDEC", "output": ["ParseError", ["Character", "\uFDEC"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDED", "input": "\uFDED", "output": ["ParseError", ["Character", "\uFDED"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDEE", "input": "\uFDEE", "output": ["ParseError", ["Character", "\uFDEE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FDEF", "input": "\uFDEF", "output": ["ParseError", ["Character", "\uFDEF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FFFE", "input": "\uFFFE", "output": ["ParseError", ["Character", "\uFFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FFFF", "input": "\uFFFF", "output": ["ParseError", ["Character", "\uFFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+1FFFE", "input": "\uD83F\uDFFE", "output": ["ParseError", ["Character", "\uD83F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+1FFFF", "input": "\uD83F\uDFFF", "output": ["ParseError", ["Character", "\uD83F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+2FFFE", "input": "\uD87F\uDFFE", "output": ["ParseError", ["Character", "\uD87F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+2FFFF", "input": "\uD87F\uDFFF", "output": ["ParseError", ["Character", "\uD87F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+3FFFE", "input": "\uD8BF\uDFFE", "output": ["ParseError", ["Character", "\uD8BF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+3FFFF", "input": "\uD8BF\uDFFF", "output": ["ParseError", ["Character", "\uD8BF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+4FFFE", "input": "\uD8FF\uDFFE", "output": ["ParseError", ["Character", "\uD8FF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+4FFFF", "input": "\uD8FF\uDFFF", "output": ["ParseError", ["Character", "\uD8FF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+5FFFE", "input": "\uD93F\uDFFE", "output": ["ParseError", ["Character", "\uD93F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+5FFFF", "input": "\uD93F\uDFFF", "output": ["ParseError", ["Character", "\uD93F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+6FFFE", "input": "\uD97F\uDFFE", "output": ["ParseError", ["Character", "\uD97F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+6FFFF", "input": "\uD97F\uDFFF", "output": ["ParseError", ["Character", "\uD97F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+7FFFE", "input": "\uD9BF\uDFFE", "output": ["ParseError", ["Character", "\uD9BF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+7FFFF", "input": "\uD9BF\uDFFF", "output": ["ParseError", ["Character", "\uD9BF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+8FFFE", "input": "\uD9FF\uDFFE", "output": ["ParseError", ["Character", "\uD9FF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+8FFFF", "input": "\uD9FF\uDFFF", "output": ["ParseError", ["Character", "\uD9FF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+9FFFE", "input": "\uDA3F\uDFFE", "output": ["ParseError", ["Character", "\uDA3F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+9FFFF", "input": "\uDA3F\uDFFF", "output": ["ParseError", ["Character", "\uDA3F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+AFFFE", "input": "\uDA7F\uDFFE", "output": ["ParseError", ["Character", "\uDA7F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+AFFFF", "input": "\uDA7F\uDFFF", "output": ["ParseError", ["Character", "\uDA7F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+BFFFE", "input": "\uDABF\uDFFE", "output": ["ParseError", ["Character", "\uDABF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+BFFFF", "input": "\uDABF\uDFFF", "output": ["ParseError", ["Character", "\uDABF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+CFFFE", "input": "\uDAFF\uDFFE", "output": ["ParseError", ["Character", "\uDAFF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+CFFFF", "input": "\uDAFF\uDFFF", "output": ["ParseError", ["Character", "\uDAFF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+DFFFE", "input": "\uDB3F\uDFFE", "output": ["ParseError", ["Character", "\uDB3F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+DFFFF", "input": "\uDB3F\uDFFF", "output": ["ParseError", ["Character", "\uDB3F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+EFFFE", "input": "\uDB7F\uDFFE", "output": ["ParseError", ["Character", "\uDB7F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+EFFFF", "input": "\uDB7F\uDFFF", "output": ["ParseError", ["Character", "\uDB7F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FFFFE", "input": "\uDBBF\uDFFE", "output": ["ParseError", ["Character", "\uDBBF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+FFFFF", "input": "\uDBBF\uDFFF", "output": ["ParseError", ["Character", "\uDBBF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+10FFFE", "input": "\uDBFF\uDFFE", "output": ["ParseError", ["Character", "\uDBFF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Invalid Unicode character U+10FFFF", "input": "\uDBFF\uDFFF", "output": ["ParseError", ["Character", "\uDBFF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-in-input-stream", "line": 1, "col": 1 } + { "code": "undefined-character-in-input-stream", "line": 1, "col": 1 } ]}, {"description": "Valid Unicode character U+0009", From 7648171932c61ab89a2ba8e65623cec8ecfa9651 Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 10 May 2017 01:25:16 +0300 Subject: [PATCH 56/82] Split numeric character errors. --- tokenizer/numericEntities.test | 132 +++++++++++++++---------------- tokenizer/test4.test | 2 +- tree-construction/entities01.dat | 4 +- 3 files changed, 69 insertions(+), 69 deletions(-) diff --git a/tokenizer/numericEntities.test b/tokenizer/numericEntities.test index 6dceac12..eaa5ca5e 100644 --- a/tokenizer/numericEntities.test +++ b/tokenizer/numericEntities.test @@ -294,462 +294,462 @@ "input": "﷐", "output": ["ParseError", ["Character", "\ufdd0"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD1", "input": "﷑", "output": ["ParseError", ["Character", "\ufdd1"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD2", "input": "﷒", "output": ["ParseError", ["Character", "\ufdd2"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD3", "input": "﷓", "output": ["ParseError", ["Character", "\ufdd3"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD4", "input": "﷔", "output": ["ParseError", ["Character", "\ufdd4"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD5", "input": "﷕", "output": ["ParseError", ["Character", "\ufdd5"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD6", "input": "﷖", "output": ["ParseError", ["Character", "\ufdd6"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD7", "input": "﷗", "output": ["ParseError", ["Character", "\ufdd7"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD8", "input": "﷘", "output": ["ParseError", ["Character", "\ufdd8"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDD9", "input": "﷙", "output": ["ParseError", ["Character", "\ufdd9"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDA", "input": "﷚", "output": ["ParseError", ["Character", "\ufdda"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDB", "input": "﷛", "output": ["ParseError", ["Character", "\ufddb"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDC", "input": "﷜", "output": ["ParseError", ["Character", "\ufddc"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDD", "input": "﷝", "output": ["ParseError", ["Character", "\ufddd"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDE", "input": "﷞", "output": ["ParseError", ["Character", "\ufdde"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDDF", "input": "﷟", "output": ["ParseError", ["Character", "\ufddf"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE0", "input": "﷠", "output": ["ParseError", ["Character", "\ufde0"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE1", "input": "﷡", "output": ["ParseError", ["Character", "\ufde1"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE2", "input": "﷢", "output": ["ParseError", ["Character", "\ufde2"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE3", "input": "﷣", "output": ["ParseError", ["Character", "\ufde3"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE4", "input": "﷤", "output": ["ParseError", ["Character", "\ufde4"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE5", "input": "﷥", "output": ["ParseError", ["Character", "\ufde5"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE6", "input": "﷦", "output": ["ParseError", ["Character", "\ufde6"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE7", "input": "﷧", "output": ["ParseError", ["Character", "\ufde7"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE8", "input": "﷨", "output": ["ParseError", ["Character", "\ufde8"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDE9", "input": "﷩", "output": ["ParseError", ["Character", "\ufde9"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDEA", "input": "﷪", "output": ["ParseError", ["Character", "\ufdea"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDEB", "input": "﷫", "output": ["ParseError", ["Character", "\ufdeb"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDEC", "input": "﷬", "output": ["ParseError", ["Character", "\ufdec"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDED", "input": "﷭", "output": ["ParseError", ["Character", "\ufded"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDEE", "input": "﷮", "output": ["ParseError", ["Character", "\ufdee"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FDEF", "input": "﷯", "output": ["ParseError", ["Character", "\ufdef"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FFFE", "input": "￾", "output": ["ParseError", ["Character", "\ufffe"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+FFFF", "input": "￿", "output": ["ParseError", ["Character", "\uffff"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "undefined-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+1FFFE", "input": "🿾", "output": ["ParseError", ["Character", "\uD83F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+1FFFF", "input": "🿿", "output": ["ParseError", ["Character", "\uD83F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+2FFFE", "input": "𯿾", "output": ["ParseError", ["Character", "\uD87F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+2FFFF", "input": "𯿿", "output": ["ParseError", ["Character", "\uD87F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+3FFFE", "input": "𿿾", "output": ["ParseError", ["Character", "\uD8BF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+3FFFF", "input": "𿿿", "output": ["ParseError", ["Character", "\uD8BF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+4FFFE", "input": "񏿾", "output": ["ParseError", ["Character", "\uD8FF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+4FFFF", "input": "񏿿", "output": ["ParseError", ["Character", "\uD8FF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+5FFFE", "input": "񟿾", "output": ["ParseError", ["Character", "\uD93F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+5FFFF", "input": "񟿿", "output": ["ParseError", ["Character", "\uD93F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+6FFFE", "input": "񯿾", "output": ["ParseError", ["Character", "\uD97F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+6FFFF", "input": "񯿿", "output": ["ParseError", ["Character", "\uD97F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+7FFFE", "input": "񿿾", "output": ["ParseError", ["Character", "\uD9BF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+7FFFF", "input": "񿿿", "output": ["ParseError", ["Character", "\uD9BF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+8FFFE", "input": "򏿾", "output": ["ParseError", ["Character", "\uD9FF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+8FFFF", "input": "򏿿", "output": ["ParseError", ["Character", "\uD9FF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+9FFFE", "input": "򟿾", "output": ["ParseError", ["Character", "\uDA3F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+9FFFF", "input": "򟿿", "output": ["ParseError", ["Character", "\uDA3F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+AFFFE", "input": "򯿾", "output": ["ParseError", ["Character", "\uDA7F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+AFFFF", "input": "򯿿", "output": ["ParseError", ["Character", "\uDA7F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+BFFFE", "input": "򿿾", "output": ["ParseError", ["Character", "\uDABF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+BFFFF", "input": "򿿿", "output": ["ParseError", ["Character", "\uDABF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+CFFFE", "input": "󏿾", "output": ["ParseError", ["Character", "\uDAFF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+CFFFF", "input": "󏿿", "output": ["ParseError", ["Character", "\uDAFF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+DFFFE", "input": "󟿾", "output": ["ParseError", ["Character", "\uDB3F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+DFFFF", "input": "󟿿", "output": ["ParseError", ["Character", "\uDB3F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+EFFFE", "input": "󯿾", "output": ["ParseError", ["Character", "\uDB7F\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+EFFFF", "input": "󯿿", "output": ["ParseError", ["Character", "\uDB7F\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+FFFFE", "input": "󿿾", "output": ["ParseError", ["Character", "\uDBBF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+FFFFF", "input": "󿿿", "output": ["ParseError", ["Character", "\uDBBF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 10 } + { "code": "undefined-character-reference", "line": 1, "col": 10 } ]}, {"description": "Invalid numeric entity character U+10FFFE", "input": "􏿾", "output": ["ParseError", ["Character", "\uDBFF\uDFFE"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } + { "code": "undefined-character-reference", "line": 1, "col": 11 } ]}, {"description": "Invalid numeric entity character U+10FFFF", "input": "􏿿", "output": ["ParseError", ["Character", "\uDBFF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } + { "code": "undefined-character-reference", "line": 1, "col": 11 } ]}, {"description": "Valid numeric entity character U+0009", diff --git a/tokenizer/test4.test b/tokenizer/test4.test index fe23df7b..5af76dbd 100644 --- a/tokenizer/test4.test +++ b/tokenizer/test4.test @@ -226,7 +226,7 @@ "input":"􏿿", "output":["ParseError", ["Character", "\uDBFF\uDFFF"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 11 } + { "code": "undefined-character-reference", "line": 1, "col": 11 } ]}, diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index c3dad356..f1a65a52 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -764,7 +764,7 @@ FOO􏿾ZOO (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity #new-errors -(1:14) control-or-undefined-character-reference +(1:14) undefined-character-reference #document | | @@ -787,7 +787,7 @@ FOO􏿿ZOO (1,3): expected-doctype-but-got-chars (1,13): illegal-codepoint-for-numeric-entity #new-errors -(1:14) control-or-undefined-character-reference +(1:14) undefined-character-reference #document | | From a6fa87828aa4f658c76aa9a9a3825a8f17a282ef Mon Sep 17 00:00:00 2001 From: inikulin Date: Wed, 10 May 2017 02:16:46 +0300 Subject: [PATCH 57/82] Fix error code. --- tokenizer/entities.test | 128 ++++++++++++------------ tokenizer/numericEntities.test | 56 +++++------ tree-construction/entities01.dat | 64 ++++++------ tree-construction/plain-text-unsafe.dat | Bin 9286 -> 9273 bytes 4 files changed, 124 insertions(+), 124 deletions(-) diff --git a/tokenizer/entities.test b/tokenizer/entities.test index 7b997153..a0614997 100644 --- a/tokenizer/entities.test +++ b/tokenizer/entities.test @@ -12,448 +12,448 @@ "input":" ", "output": ["ParseError", ["Character", "\r"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 7 } + { "code": "control-character-reference", "line": 1, "col": 7 } ]}, {"description": "CR as hexadecimal numeric entity", "input":" ", "output": ["ParseError", ["Character", "\r"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EURO SIGN numeric entity.", "input":"€", "output": ["ParseError", ["Character", "\u20AC"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u0081"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK numeric entity.", "input":"‚", "output": ["ParseError", ["Character", "\u201A"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK numeric entity.", "input":"ƒ", "output": ["ParseError", ["Character", "\u0192"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK numeric entity.", "input":"„", "output": ["ParseError", ["Character", "\u201E"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS numeric entity.", "input":"…", "output": ["ParseError", ["Character", "\u2026"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DAGGER numeric entity.", "input":"†", "output": ["ParseError", ["Character", "\u2020"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE DAGGER numeric entity.", "input":"‡", "output": ["ParseError", ["Character", "\u2021"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT numeric entity.", "input":"ˆ", "output": ["ParseError", ["Character", "\u02C6"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 PER MILLE SIGN numeric entity.", "input":"‰", "output": ["ParseError", ["Character", "\u2030"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON numeric entity.", "input":"Š", "output": ["ParseError", ["Character", "\u0160"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"‹", "output": ["ParseError", ["Character", "\u2039"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE numeric entity.", "input":"Œ", "output": ["ParseError", ["Character", "\u0152"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u008D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON numeric entity.", "input":"Ž", "output": ["ParseError", ["Character", "\u017D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u008F"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u0090"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK numeric entity.", "input":"‘", "output": ["ParseError", ["Character", "\u2018"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK numeric entity.", "input":"’", "output": ["ParseError", ["Character", "\u2019"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK numeric entity.", "input":"“", "output": ["ParseError", ["Character", "\u201C"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK numeric entity.", "input":"”", "output": ["ParseError", ["Character", "\u201D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 BULLET numeric entity.", "input":"•", "output": ["ParseError", ["Character", "\u2022"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EN DASH numeric entity.", "input":"–", "output": ["ParseError", ["Character", "\u2013"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EM DASH numeric entity.", "input":"—", "output": ["ParseError", ["Character", "\u2014"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SMALL TILDE numeric entity.", "input":"˜", "output": ["ParseError", ["Character", "\u02DC"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 TRADE MARK SIGN numeric entity.", "input":"™", "output": ["ParseError", ["Character", "\u2122"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON numeric entity.", "input":"š", "output": ["ParseError", ["Character", "\u0161"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK numeric entity.", "input":"›", "output": ["ParseError", ["Character", "\u203A"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE numeric entity.", "input":"œ", "output": ["ParseError", ["Character", "\u0153"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u009D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EURO SIGN hexadecimal numeric entity.", "input":"€", "output": ["ParseError", ["Character", "\u20AC"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u0081"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"‚", "output": ["ParseError", ["Character", "\u201A"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER F WITH HOOK hexadecimal numeric entity.", "input":"ƒ", "output": ["ParseError", ["Character", "\u0192"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE LOW-9 QUOTATION MARK hexadecimal numeric entity.", "input":"„", "output": ["ParseError", ["Character", "\u201E"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 HORIZONTAL ELLIPSIS hexadecimal numeric entity.", "input":"…", "output": ["ParseError", ["Character", "\u2026"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DAGGER hexadecimal numeric entity.", "input":"†", "output": ["ParseError", ["Character", "\u2020"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 DOUBLE DAGGER hexadecimal numeric entity.", "input":"‡", "output": ["ParseError", ["Character", "\u2021"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 MODIFIER LETTER CIRCUMFLEX ACCENT hexadecimal numeric entity.", "input":"ˆ", "output": ["ParseError", ["Character", "\u02C6"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 PER MILLE SIGN hexadecimal numeric entity.", "input":"‰", "output": ["ParseError", ["Character", "\u2030"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER S WITH CARON hexadecimal numeric entity.", "input":"Š", "output": ["ParseError", ["Character", "\u0160"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE LEFT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‹", "output": ["ParseError", ["Character", "\u2039"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LIGATURE OE hexadecimal numeric entity.", "input":"Œ", "output": ["ParseError", ["Character", "\u0152"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u008D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"Ž", "output": ["ParseError", ["Character", "\u017D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u008F"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u0090"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"‘", "output": ["ParseError", ["Character", "\u2018"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT SINGLE QUOTATION MARK hexadecimal numeric entity.", "input":"’", "output": ["ParseError", ["Character", "\u2019"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LEFT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"“", "output": ["ParseError", ["Character", "\u201C"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 RIGHT DOUBLE QUOTATION MARK hexadecimal numeric entity.", "input":"”", "output": ["ParseError", ["Character", "\u201D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 BULLET hexadecimal numeric entity.", "input":"•", "output": ["ParseError", ["Character", "\u2022"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EN DASH hexadecimal numeric entity.", "input":"–", "output": ["ParseError", ["Character", "\u2013"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 EM DASH hexadecimal numeric entity.", "input":"—", "output": ["ParseError", ["Character", "\u2014"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SMALL TILDE hexadecimal numeric entity.", "input":"˜", "output": ["ParseError", ["Character", "\u02DC"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 TRADE MARK SIGN hexadecimal numeric entity.", "input":"™", "output": ["ParseError", ["Character", "\u2122"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER S WITH CARON hexadecimal numeric entity.", "input":"š", "output": ["ParseError", ["Character", "\u0161"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 SINGLE RIGHT-POINTING ANGLE QUOTATION MARK hexadecimal numeric entity.", "input":"›", "output": ["ParseError", ["Character", "\u203A"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LIGATURE OE hexadecimal numeric entity.", "input":"œ", "output": ["ParseError", ["Character", "\u0153"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 REPLACEMENT CHAR hexadecimal numeric entity.", "input":"", "output": ["ParseError", ["Character", "\u009D"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN SMALL LETTER Z WITH CARON hexadecimal numeric entity.", "input":"ž", "output": ["ParseError", ["Character", "\u017E"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Windows-1252 LATIN CAPITAL LETTER Y WITH DIAERESIS hexadecimal numeric entity.", "input":"Ÿ", "output": ["ParseError", ["Character", "\u0178"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 8 } + { "code": "control-character-reference", "line": 1, "col": 8 } ]}, {"description": "Decimal numeric entity followed by hex character a.", diff --git a/tokenizer/numericEntities.test b/tokenizer/numericEntities.test index eaa5ca5e..bcb550fe 100644 --- a/tokenizer/numericEntities.test +++ b/tokenizer/numericEntities.test @@ -80,14 +80,14 @@ "input": "", "output": ["ParseError", ["Character", "\u0001"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0002", "input": "", "output": ["ParseError", ["Character", "\u0002"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, @@ -95,7 +95,7 @@ "input": "", "output": ["ParseError", ["Character", "\u0003"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, @@ -103,7 +103,7 @@ "input": "", "output": ["ParseError", ["Character", "\u0004"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, @@ -111,7 +111,7 @@ "input": "", "output": ["ParseError", ["Character", "\u0005"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, @@ -119,161 +119,161 @@ "input": "", "output": ["ParseError", ["Character", "\u0006"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0007", "input": "", "output": ["ParseError", ["Character", "\u0007"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0008", "input": "", "output": ["ParseError", ["Character", "\u0008"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+000B", "input": " ", "output": ["ParseError", ["Character", "\u000b"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+000E", "input": "", "output": ["ParseError", ["Character", "\u000e"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+000F", "input": "", "output": ["ParseError", ["Character", "\u000f"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0010", "input": "", "output": ["ParseError", ["Character", "\u0010"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0011", "input": "", "output": ["ParseError", ["Character", "\u0011"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0012", "input": "", "output": ["ParseError", ["Character", "\u0012"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0013", "input": "", "output": ["ParseError", ["Character", "\u0013"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0014", "input": "", "output": ["ParseError", ["Character", "\u0014"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0015", "input": "", "output": ["ParseError", ["Character", "\u0015"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0016", "input": "", "output": ["ParseError", ["Character", "\u0016"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0017", "input": "", "output": ["ParseError", ["Character", "\u0017"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0018", "input": "", "output": ["ParseError", ["Character", "\u0018"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+0019", "input": "", "output": ["ParseError", ["Character", "\u0019"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001A", "input": "", "output": ["ParseError", ["Character", "\u001a"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001B", "input": "", "output": ["ParseError", ["Character", "\u001b"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001C", "input": "", "output": ["ParseError", ["Character", "\u001c"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001D", "input": "", "output": ["ParseError", ["Character", "\u001d"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001E", "input": "", "output": ["ParseError", ["Character", "\u001e"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+001F", "input": "", "output": ["ParseError", ["Character", "\u001f"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+007F", "input": "", "output": ["ParseError", ["Character", "\u007f"]], "errors":[ - { "code": "control-or-undefined-character-reference", "line": 1, "col": 9 } + { "code": "control-character-reference", "line": 1, "col": 9 } ]}, {"description": "Invalid numeric entity character U+D800", diff --git a/tree-construction/entities01.dat b/tree-construction/entities01.dat index f1a65a52..dd6dfc86 100644 --- a/tree-construction/entities01.dat +++ b/tree-construction/entities01.dat @@ -266,7 +266,7 @@ FOO€ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -279,7 +279,7 @@ FOOZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -292,7 +292,7 @@ FOO‚ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -305,7 +305,7 @@ FOOƒZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -318,7 +318,7 @@ FOO„ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -331,7 +331,7 @@ FOO…ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -344,7 +344,7 @@ FOO†ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -357,7 +357,7 @@ FOO‡ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -370,7 +370,7 @@ FOOˆZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -383,7 +383,7 @@ FOO‰ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -396,7 +396,7 @@ FOOŠZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -409,7 +409,7 @@ FOO‹ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -422,7 +422,7 @@ FOOŒZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -435,7 +435,7 @@ FOOZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -448,7 +448,7 @@ FOOŽZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -461,7 +461,7 @@ FOOZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -474,7 +474,7 @@ FOOZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -487,7 +487,7 @@ FOO‘ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -500,7 +500,7 @@ FOO’ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -513,7 +513,7 @@ FOO“ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -526,7 +526,7 @@ FOO”ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -539,7 +539,7 @@ FOO•ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -552,7 +552,7 @@ FOO–ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -565,7 +565,7 @@ FOO—ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -578,7 +578,7 @@ FOO˜ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -591,7 +591,7 @@ FOO™ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -604,7 +604,7 @@ FOOšZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -617,7 +617,7 @@ FOO›ZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -630,7 +630,7 @@ FOOœZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -643,7 +643,7 @@ FOOZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -656,7 +656,7 @@ FOOžZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | @@ -669,7 +669,7 @@ FOOŸZOO (1,3): expected-doctype-but-got-chars (1,11): illegal-codepoint-for-numeric-entity #new-errors -(1:12) control-or-undefined-character-reference +(1:12) control-character-reference #document | | diff --git a/tree-construction/plain-text-unsafe.dat b/tree-construction/plain-text-unsafe.dat index 7aaf3c9d3f11d2f16ed432079758e361058725c5..18793e8a5b41c3907ec51095d3e046b2e9656e2f 100644 GIT binary patch delta 10 RcmX@+vD0Hh_r@9PlmQ+v1w{Y= delta 23 ecmdn#am-^vH&1?%ZfRahYFcJqYRbm0^~wNwh6)4# From 55146bd35d3c501f6eac0ec534d26f654a9523f7 Mon Sep 17 00:00:00 2001 From: Diego Date: Sun, 30 Apr 2017 22:14:21 -0700 Subject: [PATCH 58/82] Fixing DOCTYPE parse errors --- tokenizer/test2.test | 35 +- tokenizer/test3.test | 2562 ++++++++++++++++++++++++------- tree-construction/doctype01.dat | 32 + 3 files changed, 2077 insertions(+), 552 deletions(-) diff --git a/tokenizer/test2.test b/tokenizer/test2.test index e9bb0680..a9db2b4e 100644 --- a/tokenizer/test2.test +++ b/tokenizer/test2.test @@ -27,15 +27,24 @@ {"description":"DOCTYPE with EOF after PUBLIC", "input":"", @@ -47,19 +56,31 @@ {"description":"DOCTYPE with > in double-quoted publicId", "input":"x", -"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]], +"errors": [ + { "code": "abrupt-closing-of-doctype", "col": 24, "line": 1 } +]}, {"description":"DOCTYPE with > in single-quoted publicId", "input":"x", -"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]]}, +"output":["ParseError", ["DOCTYPE", "html", "", null, false], ["Character", "x"]], +"errors": [ + { "code": "abrupt-closing-of-doctype", "col": 24, "line": 1 } +]}, {"description":"DOCTYPE with > in double-quoted systemId", "input":"x", -"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]], +"errors": [ + { "code": "abrupt-closing-of-doctype", "col": 30, "line": 1 } +]}, {"description":"DOCTYPE with > in single-quoted systemId", "input":"x", -"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]]}, +"output":["ParseError", ["DOCTYPE", "html", "foo", "", false], ["Character", "x"]], +"errors": [ + { "code": "abrupt-closing-of-doctype", "col": 30, "line": 1 } +]}, {"description":"Incomplete doctype", "input":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], +"errors": [ + { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 }, + { "code": "abrupt-closing-of-doctype", "col": 20, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", "", null, true]]}, +"output":["ParseError", ["DOCTYPE", "a", "", null, true]], +"errors": [ + { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], +"errors": [ + { "code": "missing-whitespace-after-doctype-public-keyword", "col": 19, "line": 1 }, + { "code": "abrupt-closing-of-doctype", "col": 20, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", ["DOCTYPE", "a", null, null, false]], +"errors": [ + { "code": "abrupt-closing-of-doctype", "col": 19, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], +"errors":[ + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 }, + { "code": "abrupt-closing-of-doctype", "col": 20, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, "", true]]}, +"output":["ParseError", ["DOCTYPE", "a", null, "", true]], +"errors":[ + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 } +]}, {"description":"", "input":"", -"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]]}, +"output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], +"errors":[ + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 19 }, + { "code": "abrupt-closing-of-doctype", "col": 20, "line": 1 } +]}, {"description":"", "input":"", -"output":["ParseError", ["DOCTYPE", "a", null, null, false]]}, +"output":["ParseError", ["DOCTYPE", "a", null, null, false]], +"errors":[ + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 19 } +]}, {"description":"", "input":"", "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 19 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["DOCTYPE", "a", "", null, true]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", "", null, false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-public-keyword", "line": 1, "col": 18 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 19 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 18 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 19 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["DOCTYPE", "a", null, "", true]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", "ParseError", ["DOCTYPE", "a", null, "", false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "missing-whitespace-after-doctype-system-keyword", "line": 1, "col": 18 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 19 } ]}, {"description":"", "input":"", "output":["ParseError", "ParseError", ["DOCTYPE", "a", null, null, false]], "errors":[ - { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 } + { "code": "missing-whitespace-before-doctype-name", "line": 1, "col": 10 }, + { "code": "abrupt-closing-of-doctype", "line": 1, "col": 18 } ]}, {"description":" | @@ -127,6 +129,8 @@ #errors (1,28): unexpected-char-in-doctype (1,28): unknown-doctype +#new-errors +(1:28) abrupt-closing-of-doctype #document | | @@ -139,6 +143,8 @@ #errors (1,34): unexpected-char-in-doctype (1,37): unknown-doctype +#new-errors +(1:34) missing-quote-before-doctype-system-identifier #document | | @@ -151,6 +157,8 @@ #errors (1,25): unexpected-char-in-doctype (1,31): unknown-doctype +#new-errors +(1:25) missing-quote-before-doctype-system-identifier #document | | @@ -196,6 +204,8 @@ #errors (1,24): unexpected-char-in-doctype (1,34): unknown-doctype +#new-errors +(1:24) missing-quote-before-doctype-system-identifier #document | | @@ -222,6 +232,8 @@ #errors (1,24): unexpected-end-of-doctype (1,24): unknown-doctype +#new-errors +(1:24) abrupt-closing-of-doctype #document | | @@ -234,6 +246,8 @@ #errors (1,25): unexpected-end-of-doctype (1,25): unknown-doctype +#new-errors +(1:25) abrupt-closing-of-doctype #document | | @@ -246,6 +260,8 @@ #errors (1,24): unexpected-char-in-doctype (1,28): unknown-doctype +#new-errors +(1:24) missing-quote-before-doctype-public-identifier #document | | @@ -258,6 +274,8 @@ #errors (1,25): unexpected-char-in-doctype (1,29): unknown-doctype +#new-errors +(1:25) missing-quote-before-doctype-public-identifier #document | | @@ -281,6 +299,8 @@ #errors (1,29): unexpected-char-in-doctype (1,32): unknown-doctype +#new-errors +(1:29) missing-quote-before-doctype-system-identifier #document | | @@ -304,6 +324,8 @@ #errors (1,38): unexpected-char-in-doctype (1,48): unknown-doctype +#new-errors +(1:38) missing-quote-before-doctype-system-identifier #document | | @@ -399,6 +421,8 @@ #errors (1,50): unexpected-char-in-doctype +#new-errors +(1:50) missing-whitespace-between-doctype-public-and-system-identifiers #document | | @@ -409,6 +433,8 @@ #errors (1,50): unexpected-char-in-doctype +#new-errors +(1:50) missing-whitespace-between-doctype-public-and-system-identifiers #document | | @@ -420,6 +446,9 @@ #errors (1,21): unexpected-char-in-doctype (1,49): unexpected-char-in-doctype +#new-errors +(1:22) missing-whitespace-after-doctype-public-keyword +(1:49) missing-whitespace-between-doctype-public-and-system-identifiers #document | | @@ -431,6 +460,9 @@ #errors (1,21): unexpected-char-in-doctype (1,49): unexpected-char-in-doctype +#new-errors +(1:22) missing-whitespace-after-doctype-public-keyword +(1:49) missing-whitespace-between-doctype-public-and-system-identifiers #document | | From 49d6fa3365d269bf7a4785c93525e77ff780545d Mon Sep 17 00:00:00 2001 From: Simon Pieters Date: Wed, 5 Apr 2017 15:30:57 +0200 Subject: [PATCH 59/82] Test "block" elements that should close p (#91) --- tree-construction/blocks.dat | 719 +++++++++++++++++++++++++++++++++++ 1 file changed, 719 insertions(+) create mode 100644 tree-construction/blocks.dat diff --git a/tree-construction/blocks.dat b/tree-construction/blocks.dat new file mode 100644 index 00000000..5d3871ea --- /dev/null +++ b/tree-construction/blocks.dat @@ -0,0 +1,719 @@ +#data +

    foo

    bar

    baz +#errors +(1,39): expected-closing-tag-but-got-eof +30: Unclosed element “address”. +#document +| +| +| +| +|

    +| "foo" +|

    +| "bar" +|

    +| "baz" + +#data +

    foo

    bar +#errors +#document +| +| +| +| +|
    +|

    +| "foo" +| "bar" + +#data +

    foo

    bar

    baz +#errors +(1,39): expected-closing-tag-but-got-eof +30: Unclosed element “article”. +#document +| +| +| +| +|

    +| "foo" +|

    +| "bar" +|

    +| "baz" + +#data +

    foo

    bar +#errors +#document +| +| +| +| +|
    +|

    +| "foo" +| "bar" + +#data +

    foo