Skip to content

Commit 2d362d6

Browse files
committed
Fix exception in XhtmlParser with HTML5 docype
Make characters, including whitespace, after the `html' identifier in <!DOCTYPE html> be optional. Otherwise, the following errors are emitted: file.xml:2:15: whitespace expected<html xmlns="http://www.w3.org/1999/xhtml"> ^ file.xml:2:15: document must contain exactly one element And a java.lang.NullPointerException is thrown. Unfortunately, testing with Source.fromString doesn't reproduce the NullPointerException that occurs with Source.fromFile.
1 parent 9033cad commit 2d362d6

File tree

3 files changed

+114
-1
lines changed

3 files changed

+114
-1
lines changed

jvm/src/test/scala/scala/xml/parsing/ConstructingParserTest.scala

+52
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ package parsing
44
import scala.io.Source
55
import org.junit.Test
66
import scala.xml.JUnitAssertsForXML.{ assertEquals => assertXml }
7+
import org.junit.Assert.assertEquals
78

89
class ConstructingParserTest {
910

@@ -19,4 +20,55 @@ class ConstructingParserTest {
1920

2021
}
2122

23+
/* Example of using SYSTEM in DOCTYPE */
24+
@Test
25+
def docbookTest = {
26+
val xml =
27+
"""|<!DOCTYPE docbook SYSTEM 'docbook.dtd'>
28+
|<book>
29+
| <title>Book</title>
30+
| <chapter>
31+
| <title>Chapter</title>
32+
| <para>Text</para>
33+
| </chapter>
34+
|</book>""".stripMargin
35+
36+
val expected = <book>
37+
<title>Book</title>
38+
<chapter>
39+
<title>Chapter</title>
40+
<para>Text</para>
41+
</chapter>
42+
</book>
43+
44+
val source = new Source {
45+
val iter = xml.iterator
46+
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) = {}
47+
}
48+
49+
val doc = ConstructingParser.fromSource(source, true).document
50+
51+
assertEquals(expected, doc.theSeq)
52+
}
53+
54+
/* Unsupported use of lowercase DOCTYPE and SYSTEM */
55+
@Test(expected = classOf[scala.xml.parsing.FatalError])
56+
def docbookFail: Unit = {
57+
val xml =
58+
"""|<!doctype docbook system 'docbook.dtd'>
59+
|<book>
60+
|<title>Book</title>
61+
|<chapter>
62+
|<title>Chapter</title>
63+
|<para>Text</para>
64+
|</chapter>
65+
|</book>""".stripMargin
66+
67+
val source = new Source {
68+
val iter = xml.iterator
69+
override def reportError(pos: Int, msg: String, out: java.io.PrintStream = Console.err) = {}
70+
}
71+
72+
ConstructingParser.fromSource(source, true).content(TopScope)
73+
}
2274
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
package scala.xml
2+
package parsing
3+
4+
import scala.io.Source
5+
6+
import org.junit.Test
7+
import org.junit.Assert.assertEquals
8+
9+
class XhtmlParserTest {
10+
11+
@Test
12+
def issue259: Unit = {
13+
val xml =
14+
"""|<!DOCTYPE html>
15+
|<html xmlns="http://www.w3.org/1999/xhtml">
16+
| <head>
17+
| <meta charset="utf-8"/>
18+
| </head>
19+
| <body>
20+
| <p>Text</p>
21+
| </body>
22+
|</html>""".stripMargin
23+
24+
val expected = <html xmlns="http://www.w3.org/1999/xhtml">
25+
<head>
26+
<meta charset="utf-8"/>
27+
</head>
28+
<body>
29+
<p>Text</p>
30+
</body>
31+
</html>
32+
33+
assertEquals(expected, XhtmlParser(Source.fromString(xml)).theSeq)
34+
}
35+
36+
@Test
37+
def html4Strict: Unit = {
38+
val xml =
39+
"""|<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
40+
| "http://www.w3.org/TR/html4/strict.dtd">
41+
|<html>
42+
| <head>
43+
| <title>Title</title>
44+
| </head>
45+
| <body>
46+
| <p>Text</p>
47+
| </body>
48+
|</html>""".stripMargin
49+
50+
val expected = <html xmlns="http://www.w3.org/1999/xhtml">
51+
<head>
52+
<title>Title</title>
53+
</head>
54+
<body>
55+
<p>Text</p>
56+
</body>
57+
</html>
58+
59+
assertEquals(expected, XhtmlParser(Source.fromString(xml)).theSeq)
60+
}
61+
}

shared/src/main/scala/scala/xml/parsing/MarkupParser.scala

+1-1
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests {
518518
xToken("DOCTYPE")
519519
xSpace()
520520
val n = xName
521-
xSpace()
521+
xSpaceOpt()
522522
//external ID
523523
if ('S' == ch || 'P' == ch) {
524524
extID = externalID()

0 commit comments

Comments
 (0)