Skip to content

Commit ea92325

Browse files
committed
Fix OOM on malformed input.
This commit make up for the lack of scala#35. Specifically, CDATA sections, comment, DTD.
1 parent d89b64d commit ea92325

File tree

3 files changed

+121
-28
lines changed

3 files changed

+121
-28
lines changed

src/main/scala/scala/xml/parsing/MarkupParser.scala

+9-9
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
265265
}
266266
if (1 != elemCount) {
267267
reportSyntaxError("document must contain exactly one element")
268-
Console.println(children.toList)
268+
//Console.println(children.toList)
269269
}
270270

271271
doc.children = children
@@ -389,7 +389,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
389389
def xComment: NodeSeq = {
390390
val sb: StringBuilder = new StringBuilder()
391391
xToken("--")
392-
while (true) {
392+
while (!eof) {
393393
if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) {
394394
sb.length = sb.length - 1
395395
nextch()
@@ -398,7 +398,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
398398
} else sb.append(ch)
399399
nextch()
400400
}
401-
throw FatalError("this cannot happen")
401+
throw truncatedError("broken comment")
402402
}
403403

404404
/* todo: move this into the NodeBuilder class */
@@ -678,10 +678,10 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
678678

679679
def markupDecl1() = {
680680
def doInclude() = {
681-
xToken('['); while (']' != ch) markupDecl(); nextch() // ']'
681+
xToken('['); while (']' != ch && !eof) markupDecl(); nextch() // ']'
682682
}
683683
def doIgnore() = {
684-
xToken('['); while (']' != ch) nextch(); nextch() // ']'
684+
xToken('['); while (']' != ch && !eof) nextch(); nextch() // ']'
685685
}
686686
if ('?' == ch) {
687687
nextch()
@@ -747,7 +747,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
747747

748748
case _ =>
749749
curInput.reportError(pos, "unexpected character '" + ch + "', expected some markupdecl")
750-
while (ch != '>')
750+
while (ch != '>' && !eof)
751751
nextch()
752752
}
753753
}
@@ -780,7 +780,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
780780
def intSubset() {
781781
//Console.println("(DEBUG) intSubset()")
782782
xSpace()
783-
while (']' != ch)
783+
while (']' != ch && !eof)
784784
markupDecl()
785785
}
786786

@@ -792,7 +792,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
792792
xSpace()
793793
val n = xName
794794
xSpace()
795-
while ('>' != ch) {
795+
while ('>' != ch && !eof) {
796796
//Console.println("["+ch+"]")
797797
putChar(ch)
798798
nextch()
@@ -817,7 +817,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
817817
var attList: List[AttrDecl] = Nil
818818

819819
// later: find the elemDecl for n
820-
while ('>' != ch) {
820+
while ('>' != ch && !eof) {
821821
val aname = xName
822822
xSpace()
823823
// could be enumeration (foo,bar) parse this later :-/

src/main/scala/scala/xml/parsing/MarkupParserCommon.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ private[scala] trait MarkupParserCommon extends TokenTests {
247247
while (true) {
248248
if (ch == head && peek(rest))
249249
return handler(positioner(), sb.toString)
250-
else if (ch == SU)
250+
else if (ch == SU || eof)
251251
truncatedError("") // throws TruncatedXMLControl in compiler
252252

253253
sb append ch

src/test/scala/scala/xml/pull/XMLEventReaderTest.scala

+111-18
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,19 @@ package scala.xml
22
package pull
33

44
import org.junit.Test
5-
import org.junit.Ignore
6-
import org.junit.runner.RunWith
7-
import org.junit.runners.JUnit4
8-
import org.junit.Assert.assertTrue
9-
import org.junit.Assert.assertFalse
10-
import org.junit.Assert.assertEquals
5+
import org.junit.Assert.{assertFalse, assertTrue}
116

127
import scala.io.Source
138

149
class XMLEventReaderTest {
1510

1611
val src = Source.fromString("<hello><world/>!</hello>")
1712

13+
private def toSource(s: String) = new Source {
14+
val iter = s.iterator
15+
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
16+
}
17+
1818
@Test
1919
def pull: Unit = {
2020
val er = new XMLEventReader(src)
@@ -44,17 +44,110 @@ class XMLEventReaderTest {
4444
@Test
4545
def issue35: Unit = {
4646
val broken = "<broken attribute='is truncated"
47-
val x = new Source {
48-
val iter = broken.iterator
49-
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
50-
}
51-
val r = new XMLEventReader(x)
47+
val r = new XMLEventReader(toSource(broken))
48+
49+
assertTrue(r.next.isInstanceOf[EvElemStart])
50+
}
51+
52+
@Test
53+
def malformedCDATA: Unit = {
54+
val data = "<broken><![CDATA[A"
55+
val r = new XMLEventReader(toSource(data))
56+
57+
assertTrue(r.next.isInstanceOf[EvElemStart])
58+
}
59+
60+
@Test
61+
def malformedComment1: Unit = {
62+
val data = "<broken><!"
63+
val r = new XMLEventReader(toSource(data))
64+
5265
assertTrue(r.next.isInstanceOf[EvElemStart])
5366
}
5467

55-
@Test(expected = classOf[Exception])
56-
def missingTagTest: Unit = {
57-
val data=
68+
@Test
69+
def malformedComment2: Unit = {
70+
val data = "<broken><!-- comment "
71+
val r = new XMLEventReader(toSource(data))
72+
73+
assertTrue(r.next.isInstanceOf[EvElemStart])
74+
}
75+
76+
@Test
77+
def malformedDTD1: Unit = {
78+
// broken ELEMENT
79+
val data =
80+
"""<?xml version="1.0" encoding="utf-8"?>
81+
|<!DOCTYPE broken [
82+
| <!ELE
83+
""".stripMargin
84+
val r = new XMLEventReader(toSource(data))
85+
86+
assertFalse(r.hasNext)
87+
}
88+
89+
@Test
90+
def malformedDTD2: Unit = {
91+
val data =
92+
"""<!DOCTYPE broken [
93+
| <!ELEMENT data (#PCDATA)>
94+
""".stripMargin
95+
val r = new XMLEventReader(toSource(data))
96+
97+
assertFalse(r.hasNext)
98+
}
99+
100+
@Test
101+
def malformedDTD3: Unit = {
102+
// broken ATTLIST
103+
val data =
104+
"""<!DOCTYPE broken [
105+
| <!ATTL
106+
""".stripMargin
107+
val r = new XMLEventReader(toSource(data))
108+
109+
assertFalse(r.hasNext)
110+
}
111+
112+
@Test
113+
def malformedDTD4: Unit = {
114+
// unexpected declaration
115+
val data =
116+
"""<!DOCTYPE broken [
117+
| <!UNEXPECTED
118+
""".stripMargin
119+
val r = new XMLEventReader(toSource(data))
120+
121+
assertFalse(r.hasNext)
122+
}
123+
124+
@Test
125+
def malformedDTD5: Unit = {
126+
val data =
127+
"""<!DOCTYPE broken [
128+
| <!ENTITY % foo 'INCLUDE'>
129+
| <![%foo;[
130+
""".stripMargin
131+
val r = new XMLEventReader(toSource(data))
132+
133+
assertFalse(r.hasNext)
134+
}
135+
136+
@Test
137+
def malformedDTD6: Unit = {
138+
val data =
139+
"""<!DOCTYPE broken [
140+
| <!ENTITY % foo 'IGNORE'>
141+
| <![%foo;[
142+
""".stripMargin
143+
val r = new XMLEventReader(toSource(data))
144+
145+
assertFalse(r.hasNext)
146+
}
147+
148+
@Test(expected = classOf[Exception])
149+
def missingTagTest: Unit = {
150+
val data=
58151
"""<?xml version="1.0" ?>
59152
|<verbosegc xmlns="http://www.ibm.com/j9/verbosegc">
60153
|
@@ -66,8 +159,8 @@ class XMLEventReaderTest {
66159
|</exclusive-start>
67160
|""".stripMargin
68161

69-
val er = new XMLEventReader(Source.fromString(data))
70-
while(er.hasNext) er.next()
71-
er.stop()
72-
}
162+
val er = new XMLEventReader(toSource(data))
163+
while(er.hasNext) er.next()
164+
er.stop()
165+
}
73166
}

0 commit comments

Comments
 (0)