Skip to content

Commit 55008bd

Browse files
authored
Merge pull request #99 from shimamoto/oom-malformed
Fix OOM on malformed input
2 parents d89b64d + ca3e8ce commit 55008bd

File tree

3 files changed

+126
-28
lines changed

3 files changed

+126
-28
lines changed

src/main/scala/scala/xml/parsing/MarkupParser.scala

+9-9
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
265265
}
266266
if (1 != elemCount) {
267267
reportSyntaxError("document must contain exactly one element")
268-
Console.println(children.toList)
268+
//Console.println(children.toList)
269269
}
270270

271271
doc.children = children
@@ -389,7 +389,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
389389
def xComment: NodeSeq = {
390390
val sb: StringBuilder = new StringBuilder()
391391
xToken("--")
392-
while (true) {
392+
while (!eof) {
393393
if (ch == '-' && { sb.append(ch); nextch(); ch == '-' }) {
394394
sb.length = sb.length - 1
395395
nextch()
@@ -398,7 +398,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
398398
} else sb.append(ch)
399399
nextch()
400400
}
401-
throw FatalError("this cannot happen")
401+
throw truncatedError("broken comment")
402402
}
403403

404404
/* todo: move this into the NodeBuilder class */
@@ -678,10 +678,10 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
678678

679679
def markupDecl1() = {
680680
def doInclude() = {
681-
xToken('['); while (']' != ch) markupDecl(); nextch() // ']'
681+
xToken('['); while (']' != ch && !eof) markupDecl(); nextch() // ']'
682682
}
683683
def doIgnore() = {
684-
xToken('['); while (']' != ch) nextch(); nextch() // ']'
684+
xToken('['); while (']' != ch && !eof) nextch(); nextch() // ']'
685685
}
686686
if ('?' == ch) {
687687
nextch()
@@ -747,7 +747,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
747747

748748
case _ =>
749749
curInput.reportError(pos, "unexpected character '" + ch + "', expected some markupdecl")
750-
while (ch != '>')
750+
while (ch != '>' && !eof)
751751
nextch()
752752
}
753753
}
@@ -780,7 +780,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
780780
def intSubset() {
781781
//Console.println("(DEBUG) intSubset()")
782782
xSpace()
783-
while (']' != ch)
783+
while (']' != ch && !eof)
784784
markupDecl()
785785
}
786786

@@ -792,7 +792,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
792792
xSpace()
793793
val n = xName
794794
xSpace()
795-
while ('>' != ch) {
795+
while ('>' != ch && !eof) {
796796
//Console.println("["+ch+"]")
797797
putChar(ch)
798798
nextch()
@@ -817,7 +817,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
817817
var attList: List[AttrDecl] = Nil
818818

819819
// later: find the elemDecl for n
820-
while ('>' != ch) {
820+
while ('>' != ch && !eof) {
821821
val aname = xName
822822
xSpace()
823823
// could be enumeration (foo,bar) parse this later :-/

src/main/scala/scala/xml/parsing/MarkupParserCommon.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ private[scala] trait MarkupParserCommon extends TokenTests {
247247
while (true) {
248248
if (ch == head && peek(rest))
249249
return handler(positioner(), sb.toString)
250-
else if (ch == SU)
250+
else if (ch == SU || eof)
251251
truncatedError("") // throws TruncatedXMLControl in compiler
252252

253253
sb append ch

src/test/scala/scala/xml/pull/XMLEventReaderTest.scala

+116-18
Original file line numberDiff line numberDiff line change
@@ -2,19 +2,20 @@ package scala.xml
22
package pull
33

44
import org.junit.Test
5-
import org.junit.Ignore
6-
import org.junit.runner.RunWith
7-
import org.junit.runners.JUnit4
8-
import org.junit.Assert.assertTrue
9-
import org.junit.Assert.assertFalse
10-
import org.junit.Assert.assertEquals
5+
import org.junit.Assert.{assertFalse, assertTrue}
116

127
import scala.io.Source
8+
import scala.xml.parsing.FatalError
139

1410
class XMLEventReaderTest {
1511

1612
val src = Source.fromString("<hello><world/>!</hello>")
1713

14+
private def toSource(s: String) = new Source {
15+
val iter = s.iterator
16+
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
17+
}
18+
1819
@Test
1920
def pull: Unit = {
2021
val er = new XMLEventReader(src)
@@ -44,17 +45,114 @@ class XMLEventReaderTest {
4445
@Test
4546
def issue35: Unit = {
4647
val broken = "<broken attribute='is truncated"
47-
val x = new Source {
48-
val iter = broken.iterator
49-
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) {}
50-
}
51-
val r = new XMLEventReader(x)
48+
val r = new XMLEventReader(toSource(broken))
49+
50+
assertTrue(r.next.isInstanceOf[EvElemStart])
51+
}
52+
53+
@Test(expected = classOf[FatalError])
54+
def malformedCDATA: Unit = {
55+
val data = "<broken><![CDATA[A"
56+
val r = new XMLEventReader(toSource(data))
57+
5258
assertTrue(r.next.isInstanceOf[EvElemStart])
59+
// error when returning EvText of CDATA
60+
r.next
61+
}
62+
63+
@Test(expected = classOf[FatalError])
64+
def malformedComment1: Unit = {
65+
val data = "<!"
66+
val r = new XMLEventReader(toSource(data))
67+
68+
// error when returning EvComment
69+
r.next
70+
}
71+
72+
@Test(expected = classOf[FatalError])
73+
def malformedComment2: Unit = {
74+
val data = "<!-- comment "
75+
val r = new XMLEventReader(toSource(data))
76+
77+
// error when returning EvComment
78+
r.next
79+
}
80+
81+
@Test
82+
def malformedDTD1: Unit = {
83+
// broken ELEMENT
84+
val data =
85+
"""<?xml version="1.0" encoding="utf-8"?>
86+
|<!DOCTYPE broken [
87+
| <!ELE
88+
""".stripMargin
89+
val r = new XMLEventReader(toSource(data))
90+
91+
assertFalse(r.hasNext)
92+
}
93+
94+
@Test
95+
def malformedDTD2: Unit = {
96+
val data =
97+
"""<!DOCTYPE broken [
98+
| <!ELEMENT data (#PCDATA)>
99+
""".stripMargin
100+
val r = new XMLEventReader(toSource(data))
101+
102+
assertFalse(r.hasNext)
103+
}
104+
105+
@Test
106+
def malformedDTD3: Unit = {
107+
// broken ATTLIST
108+
val data =
109+
"""<!DOCTYPE broken [
110+
| <!ATTL
111+
""".stripMargin
112+
val r = new XMLEventReader(toSource(data))
113+
114+
assertFalse(r.hasNext)
115+
}
116+
117+
@Test
118+
def malformedDTD4: Unit = {
119+
// unexpected declaration
120+
val data =
121+
"""<!DOCTYPE broken [
122+
| <!UNEXPECTED
123+
""".stripMargin
124+
val r = new XMLEventReader(toSource(data))
125+
126+
assertFalse(r.hasNext)
53127
}
54128

55-
@Test(expected = classOf[Exception])
56-
def missingTagTest: Unit = {
57-
val data=
129+
@Test
130+
def malformedDTD5: Unit = {
131+
val data =
132+
"""<!DOCTYPE broken [
133+
| <!ENTITY % foo 'INCLUDE'>
134+
| <![%foo;[
135+
""".stripMargin
136+
val r = new XMLEventReader(toSource(data))
137+
138+
assertFalse(r.hasNext)
139+
}
140+
141+
@Test
142+
def malformedDTD6: Unit = {
143+
val data =
144+
"""<!DOCTYPE broken [
145+
| <!ENTITY % foo 'IGNORE'>
146+
| <![%foo;[
147+
""".stripMargin
148+
val r = new XMLEventReader(toSource(data))
149+
150+
assertFalse(r.hasNext)
151+
}
152+
153+
@Test(expected = classOf[Exception])
154+
def missingTagTest: Unit = {
155+
val data=
58156
"""<?xml version="1.0" ?>
59157
|<verbosegc xmlns="http://www.ibm.com/j9/verbosegc">
60158
|
@@ -66,8 +164,8 @@ class XMLEventReaderTest {
66164
|</exclusive-start>
67165
|""".stripMargin
68166

69-
val er = new XMLEventReader(Source.fromString(data))
70-
while(er.hasNext) er.next()
71-
er.stop()
72-
}
167+
val er = new XMLEventReader(toSource(data))
168+
while(er.hasNext) er.next()
169+
er.stop()
170+
}
73171
}

0 commit comments

Comments
 (0)