Skip to content

Commit 0d35eb6

Browse files
authored
Merge pull request #98 from RReverser/master
Concatenate character tokens
2 parents c9816cf + 8e19e7a commit 0d35eb6

File tree

5 files changed

+12
-12
lines changed

5 files changed

+12
-12
lines changed

tokenizer/test1.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,14 @@
182182

183183
{"description":"Entity without trailing semicolon (1)",
184184
"input":"I'm &notit",
185-
"output":[["Character","I'm "], ["Character", "\u00ACit"]],
185+
"output":[["Character","I'm \u00ACit"]],
186186
"errors": [
187187
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
188188
]},
189189

190190
{"description":"Entity without trailing semicolon (2)",
191191
"input":"I'm &notin",
192-
"output":[["Character","I'm "], ["Character", "\u00ACin"]],
192+
"output":[["Character","I'm \u00ACin"]],
193193
"errors": [
194194
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
195195
]},

tokenizer/test2.test

+3-3
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119

120120
{"description":"Hexadecimal entity pair representing a surrogate pair",
121121
"input":"��",
122-
"output":[["Character", "\uFFFD"], ["Character", "\uFFFD"]],
122+
"output":[["Character", "\uFFFD\uFFFD"]],
123123
"errors":[
124124
{ "code": "surrogate-character-reference", "line": 1, "col": 9 },
125125
{ "code": "surrogate-character-reference", "line": 1, "col": 17 }
@@ -195,7 +195,7 @@
195195

196196
{"description":"Unescaped <",
197197
"input":"foo < bar",
198-
"output":[["Character", "foo "], ["Character", "< bar"]],
198+
"output":[["Character", "foo < bar"]],
199199
"errors":[
200200
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
201201
]},
@@ -242,7 +242,7 @@
242242

243243
{"description":"Empty end tag with following characters",
244244
"input":"a</>bc",
245-
"output":[["Character", "a"], ["Character", "bc"]],
245+
"output":[["Character", "abc"]],
246246
"errors":[
247247
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
248248
]},

tokenizer/test3.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888

8989
{"description":"<\\u0000",
9090
"input":"<\u0000",
91-
"output":[["Character", "<"], ["Character", "\u0000"]],
91+
"output":[["Character", "<\u0000"]],
9292
"errors":[
9393
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
9494
{ "code": "unexpected-null-character", "line": 1, "col": 2 }
@@ -8415,7 +8415,7 @@
84158415

84168416
{"description":"<<",
84178417
"input":"<<",
8418-
"output":[["Character", "<"], ["Character", "<"]],
8418+
"output":[["Character", "<<"]],
84198419
"errors":[
84208420
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
84218421
{ "code": "eof-before-tag-name", "line": 1, "col": 3 }

tokenizer/test4.test

+3-3
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@
190190

191191
{"description":"Empty hex numeric entities",
192192
"input":"&#x &#X ",
193-
"output":[["Character", "&#x "], ["Character", "&#X "]],
193+
"output":[["Character", "&#x &#X "]],
194194
"errors":[
195195
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
196196
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
@@ -205,7 +205,7 @@
205205

206206
{"description":"Empty decimal numeric entities",
207207
"input":"&# &#; ",
208-
"output":[["Character", "&# "], ["Character", "&#; "]],
208+
"output":[["Character", "&# &#; "]],
209209
"errors":[
210210
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
211211
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
@@ -274,7 +274,7 @@
274274

275275
{"description":"Surrogate code point edge cases",
276276
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
277-
"output":[["Character", "\uD7FF"], ["Character", "\uFFFD"], ["Character", "\uFFFD"], ["Character", "\uFFFD"], ["Character", "\uFFFD\uE000"]],
277+
"output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
278278
"errors":[
279279
{ "code": "surrogate-character-reference", "line": 1, "col": 17 },
280280
{ "code": "surrogate-character-reference", "line": 1, "col": 25 },

tokenizer/unicodeCharsProblematic.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
1919
"doubleEscaped":true,
2020
"input": "a\\uDFFF",
21-
"output":[["Character", "a"], ["Character", "\\uDFFF"]],
21+
"output":[["Character", "a\\uDFFF"]],
2222
"errors":[
2323
{ "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
2424
]},
@@ -33,7 +33,7 @@
3333

3434
{"description":"CR followed by U+0000",
3535
"input":"\r\u0000",
36-
"output":[["Character", "\n"], ["Character", "\u0000"]],
36+
"output":[["Character", "\n\u0000"]],
3737
"errors":[
3838
{ "code": "unexpected-null-character", "line": 2, "col": 1 }
3939
]}

0 commit comments

Comments
 (0)