Skip to content

Commit 75823c6

Browse files
paulpadriaanm
authored andcommitted
More work consolidating the XML code needlessly...
More work consolidating the XML code needlessly duplicated between the compiler and the library. Having to fix #2354 in two completely different places was I found very motivating.
1 parent 92b270f commit 75823c6

File tree

2 files changed

+192
-166
lines changed

2 files changed

+192
-166
lines changed

src/library/scala/xml/parsing/MarkupParser.scala

+18-160
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,13 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
3232
self: MarkupParser with MarkupHandler =>
3333

3434
type PositionType = Int
35-
type InputType = Source
35+
type InputType = Source
36+
type ElementType = NodeSeq
37+
type AttributesType = (MetaData, NamespaceBinding)
38+
type NamespaceType = NamespaceBinding
39+
40+
def truncatedError(msg: String): Nothing = throw FatalError(msg)
41+
def errorNoEnd(tag: String) = throw FatalError("expected closing tag of " + tag)
3642

3743
def xHandleError(that: Char, msg: String) = reportSyntaxError(msg)
3844

@@ -106,8 +112,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
106112
* // this is a bit more lenient than necessary...
107113
*/
108114
def prolog(): Tuple3[Option[String], Option[String], Option[Boolean]] = {
109-
110-
//Console.println("(DEBUG) prolog")
111115
var n = 0
112116
var info_ver: Option[String] = None
113117
var info_enc: Option[String] = None
@@ -176,7 +180,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
176180
if (m.length - n != 0) {
177181
reportSyntaxError("VersionInfo EncodingDecl? or '?>' expected!");
178182
}
179-
//Console.println("[MarkupParser::textDecl] finished parsing textdecl");
180183
Tuple2(info_ver, info_enc);
181184
}
182185

@@ -190,8 +193,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
190193
*/
191194

192195
def document(): Document = {
193-
194-
//Console.println("(DEBUG) document")
195196
doc = new Document()
196197

197198
this.dtd = null
@@ -204,18 +205,15 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
204205
nextch // is prolog ?
205206
var children: NodeSeq = null
206207
if ('?' == ch) {
207-
//Console.println("[MarkupParser::document] starts with xml declaration");
208208
nextch;
209209
info_prolog = prolog()
210210
doc.version = info_prolog._1
211211
doc.encoding = info_prolog._2
212212
doc.standAlone = info_prolog._3
213213

214214
children = content(TopScope) // DTD handled as side effect
215-
} else {
216-
//Console.println("[MarkupParser::document] does not start with xml declaration");
217-
//
218-
215+
}
216+
else {
219217
val ts = new NodeBuffer();
220218
content1(TopScope, ts); // DTD handled as side effect
221219
ts &+ content(TopScope);
@@ -257,6 +255,14 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
257255
this
258256
}
259257

258+
def ch_returning_nextch = { val res = ch ; nextch ; res }
259+
def mkProcInstr(position: Int, name: String, text: String): NodeSeq =
260+
handle.procInstr(position, name, text)
261+
262+
def mkAttributes(name: String, pscope: NamespaceBinding) =
263+
if (isNameStart (ch)) xAttributes(pscope)
264+
else (Null, pscope)
265+
260266
/** this method assign the next character to ch and advances in input */
261267
def nextch = {
262268
if (curInput.hasNext) {
@@ -315,27 +321,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
315321
(aMap,scope)
316322
}
317323

318-
/** attribute value, terminated by either ' or ". value may not contain <.
319-
* AttValue ::= `'` { _ } `'`
320-
* | `"` { _ } `"`
321-
*/
322-
def xAttributeValue(): String = {
323-
val endch = ch
324-
nextch
325-
while (ch != endch) {
326-
if ('<' == ch)
327-
reportSyntaxError( "'<' not allowed in attrib value" );
328-
putChar(ch)
329-
nextch
330-
}
331-
nextch
332-
val str = cbuf.toString()
333-
cbuf.length = 0
334-
335-
// well-formedness constraint
336-
normalizeAttributeValue(str)
337-
}
338-
339324
/** entity value, terminated by either ' or ". value may not contain &lt;.
340325
* AttValue ::= `'` { _ } `'`
341326
* | `"` { _ } `"`
@@ -353,35 +338,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
353338
str
354339
}
355340

356-
357-
/** parse a start or empty tag.
358-
* [40] STag ::= '&lt;' Name { S Attribute } [S]
359-
* [44] EmptyElemTag ::= '&lt;' Name { S Attribute } [S]
360-
*/
361-
protected def xTag(pscope:NamespaceBinding): (String, MetaData, NamespaceBinding) = {
362-
val qname = xName
363-
364-
xSpaceOpt
365-
val (aMap: MetaData, scope: NamespaceBinding) = {
366-
if (isNameStart(ch))
367-
xAttributes(pscope)
368-
else
369-
(Null, pscope)
370-
}
371-
(qname, aMap, scope)
372-
}
373-
374-
/** [42] '&lt;' xmlEndTag ::= '&lt;' '/' Name S? '&gt;'
375-
*/
376-
def xEndTag(n: String) = {
377-
xToken('/')
378-
val m = xName
379-
if (n != m)
380-
reportSyntaxError("expected closing tag of " + n/* +", not "+m*/);
381-
xSpaceOpt
382-
xToken('>')
383-
}
384-
385341
/** '&lt;! CharData ::= [CDATA[ ( {char} - {char}"]]&gt;"{char} ) ']]&gt;'
386342
*
387343
* see [15]
@@ -392,14 +348,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
392348
xTakeUntil(mkResult, () => pos, "]]>")
393349
}
394350

395-
/** CharRef ::= "&amp;#" '0'..'9' {'0'..'9'} ";"
396-
* | "&amp;#x" '0'..'9'|'A'..'F'|'a'..'f' { hexdigit } ";"
397-
*
398-
* see [66]
399-
*/
400-
def xCharRef(ch: () => Char, nextch: () => Unit): String =
401-
Utility.parseCharRef(ch, nextch, reportSyntaxError _)
402-
403351
/** Comment ::= '&lt;!--' ((Char - '-') | ('-' (Char - '-')))* '--&gt;'
404352
*
405353
* see [15]
@@ -576,7 +524,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
576524
*/
577525
def element1(pscope: NamespaceBinding): NodeSeq = {
578526
val pos = this.pos
579-
val (qname, aMap, scope) = xTag(pscope)
527+
val (qname, (aMap, scope)) = xTag(pscope)
580528
val (pre, local) = Utility.prefix(qname) match {
581529
case Some(p) => (p, qname drop p.length+1)
582530
case _ => (null, qname)
@@ -600,50 +548,6 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
600548
res
601549
}
602550

603-
//def xEmbeddedExpr: MarkupType;
604-
605-
/** Name ::= (Letter | '_' | ':') (NameChar)*
606-
*
607-
* see [5] of XML 1.0 specification
608-
*/
609-
def xName: String = {
610-
if (isNameStart(ch)) {
611-
while (isNameChar(ch)) {
612-
putChar(ch)
613-
nextch
614-
}
615-
val n = cbuf.toString().intern()
616-
cbuf.length = 0
617-
n
618-
} else {
619-
reportSyntaxError("name expected")
620-
""
621-
}
622-
}
623-
624-
/** '&lt;?' ProcInstr ::= Name [S ({Char} - ({Char}'&gt;?' {Char})]'?&gt;'
625-
*
626-
* see [15]
627-
*/
628-
def xProcInstr: NodeSeq = {
629-
val sb:StringBuilder = new StringBuilder()
630-
val n = xName
631-
if (isSpace(ch)) {
632-
xSpace
633-
while (true) {
634-
if (ch == '?' && { sb.append( ch ); nextch; ch == '>' }) {
635-
sb.length = sb.length - 1;
636-
nextch;
637-
return handle.procInstr(tmppos, n, sb.toString);
638-
} else
639-
sb.append(ch);
640-
nextch
641-
}
642-
};
643-
xToken("?>")
644-
handle.procInstr(tmppos, n, sb.toString)
645-
}
646-
647551
/** parse character data.
648552
* precondition: xEmbeddedBlock == false (we are not in a scala block)
649553
*/
@@ -996,50 +900,4 @@ trait MarkupParser extends MarkupParserCommon with TokenTests
996900
pos = curInput.pos
997901
eof = false // must be false, because of places where entity refs occur
998902
}
999-
1000-
/** for the moment, replace only character references
1001-
* see spec 3.3.3
1002-
* precond: cbuf empty
1003-
*/
1004-
def normalizeAttributeValue(attval: String): String = {
1005-
val s: Seq[Char] = attval
1006-
val it = s.iterator
1007-
while (it.hasNext) {
1008-
it.next match {
1009-
case ' '|'\t'|'\n'|'\r' =>
1010-
cbuf.append(' ');
1011-
case '&' => it.next match {
1012-
case '#' =>
1013-
var c = it.next
1014-
val s = xCharRef ({ () => c }, { () => c = it.next })
1015-
cbuf.append(s)
1016-
case nchar =>
1017-
val nbuf = new StringBuilder()
1018-
var d = nchar
1019-
do {
1020-
nbuf.append(d)
1021-
d = it.next
1022-
} while(d != ';');
1023-
nbuf.toString() match {
1024-
case "lt" => cbuf.append('<')
1025-
case "gt" => cbuf.append('>')
1026-
case "amp" => cbuf.append('&')
1027-
case "apos" => cbuf.append('\'')
1028-
case "quot" => cbuf.append('"')
1029-
case "quote" => cbuf.append('"')
1030-
case name =>
1031-
cbuf.append('&')
1032-
cbuf.append(name)
1033-
cbuf.append(';')
1034-
}
1035-
}
1036-
case c =>
1037-
cbuf.append(c)
1038-
}
1039-
}
1040-
val name = cbuf.toString()
1041-
cbuf.length = 0
1042-
name
1043-
}
1044-
1045903
}

0 commit comments

Comments
 (0)