Skip to content

Commit 8e19e7a

Browse files
committed
Concatenate character tokens
Looks like these few places were missed when ParseError token type was removed. This PR fixes them to restore the state promised in the README: > All adjacent character tokens are coalesced into a single ["Character", data] token.
1 parent c9816cf commit 8e19e7a

File tree

5 files changed

+12
-12
lines changed

5 files changed

+12
-12
lines changed

tokenizer/test1.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -182,14 +182,14 @@
182182

183183
{"description":"Entity without trailing semicolon (1)",
184184
"input":"I'm &notit",
185-
"output":[["Character","I'm "], ["Character", "\u00ACit"]],
185+
"output":[["Character","I'm \u00ACit"]],
186186
"errors": [
187187
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
188188
]},
189189

190190
{"description":"Entity without trailing semicolon (2)",
191191
"input":"I'm &notin",
192-
"output":[["Character","I'm "], ["Character", "\u00ACin"]],
192+
"output":[["Character","I'm \u00ACin"]],
193193
"errors": [
194194
{"code" : "missing-semicolon-after-character-reference", "line": 1, "col": 9 }
195195
]},

tokenizer/test2.test

+3-3
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@
119119

120120
{"description":"Hexadecimal entity pair representing a surrogate pair",
121121
"input":"��",
122-
"output":[["Character", "\uFFFD"], ["Character", "\uFFFD"]],
122+
"output":[["Character", "\uFFFD\uFFFD"]],
123123
"errors":[
124124
{ "code": "surrogate-character-reference", "line": 1, "col": 9 },
125125
{ "code": "surrogate-character-reference", "line": 1, "col": 17 }
@@ -195,7 +195,7 @@
195195

196196
{"description":"Unescaped <",
197197
"input":"foo < bar",
198-
"output":[["Character", "foo "], ["Character", "< bar"]],
198+
"output":[["Character", "foo < bar"]],
199199
"errors":[
200200
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 6 }
201201
]},
@@ -242,7 +242,7 @@
242242

243243
{"description":"Empty end tag with following characters",
244244
"input":"a</>bc",
245-
"output":[["Character", "a"], ["Character", "bc"]],
245+
"output":[["Character", "abc"]],
246246
"errors":[
247247
{ "code": "missing-end-tag-name", "line": 1, "col": 4 }
248248
]},

tokenizer/test3.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@
8888

8989
{"description":"<\\u0000",
9090
"input":"<\u0000",
91-
"output":[["Character", "<"], ["Character", "\u0000"]],
91+
"output":[["Character", "<\u0000"]],
9292
"errors":[
9393
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
9494
{ "code": "unexpected-null-character", "line": 1, "col": 2 }
@@ -8415,7 +8415,7 @@
84158415

84168416
{"description":"<<",
84178417
"input":"<<",
8418-
"output":[["Character", "<"], ["Character", "<"]],
8418+
"output":[["Character", "<<"]],
84198419
"errors":[
84208420
{ "code": "invalid-first-character-of-tag-name", "line": 1, "col": 2 },
84218421
{ "code": "eof-before-tag-name", "line": 1, "col": 3 }

tokenizer/test4.test

+3-3
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,7 @@
190190

191191
{"description":"Empty hex numeric entities",
192192
"input":"&#x &#X ",
193-
"output":[["Character", "&#x "], ["Character", "&#X "]],
193+
"output":[["Character", "&#x &#X "]],
194194
"errors":[
195195
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 4 },
196196
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 8 }
@@ -205,7 +205,7 @@
205205

206206
{"description":"Empty decimal numeric entities",
207207
"input":"&# &#; ",
208-
"output":[["Character", "&# "], ["Character", "&#; "]],
208+
"output":[["Character", "&# &#; "]],
209209
"errors":[
210210
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 3 },
211211
{ "code": "absence-of-digits-in-numeric-character-reference", "line": 1, "col": 6 }
@@ -274,7 +274,7 @@
274274

275275
{"description":"Surrogate code point edge cases",
276276
"input":"&#xD7FF;&#xD800;&#xD801;&#xDFFE;&#xDFFF;&#xE000;",
277-
"output":[["Character", "\uD7FF"], ["Character", "\uFFFD"], ["Character", "\uFFFD"], ["Character", "\uFFFD"], ["Character", "\uFFFD\uE000"]],
277+
"output":[["Character", "\uD7FF\uFFFD\uFFFD\uFFFD\uFFFD\uE000"]],
278278
"errors":[
279279
{ "code": "surrogate-character-reference", "line": 1, "col": 17 },
280280
{ "code": "surrogate-character-reference", "line": 1, "col": 25 },

tokenizer/unicodeCharsProblematic.test

+2-2
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@
1818
{"description": "Invalid Unicode character U+DFFF with valid preceding character",
1919
"doubleEscaped":true,
2020
"input": "a\\uDFFF",
21-
"output":[["Character", "a"], ["Character", "\\uDFFF"]],
21+
"output":[["Character", "a\\uDFFF"]],
2222
"errors":[
2323
{ "code": "surrogate-in-input-stream", "line": 1, "col": 2 }
2424
]},
@@ -33,7 +33,7 @@
3333

3434
{"description":"CR followed by U+0000",
3535
"input":"\r\u0000",
36-
"output":[["Character", "\n"], ["Character", "\u0000"]],
36+
"output":[["Character", "\n\u0000"]],
3737
"errors":[
3838
{ "code": "unexpected-null-character", "line": 2, "col": 1 }
3939
]}

0 commit comments

Comments
 (0)