From 5f2cfadeb9e8574ed66f37dc7a7a868eb129a8a9 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Mon, 22 Dec 2014 23:34:21 -0800 Subject: [PATCH 1/3] SI-9060 Robust character handling Enforce that attribute value is bounded by a quote. By the way, stop on EOF. --- src/main/scala/scala/xml/parsing/MarkupParserCommon.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala b/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala index 31d81d343..7bd4a4804 100644 --- a/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala +++ b/src/main/scala/scala/xml/parsing/MarkupParserCommon.scala @@ -62,8 +62,9 @@ private[scala] trait MarkupParserCommon extends TokenTests { * @param endCh either `'` or `"` */ def xAttributeValue(endCh: Char): String = { + require(endCh == '\'' || endCh == '"', s"Expected single or double quote, found $endCh") val buf = new StringBuilder - while (ch != endCh) { + while (ch != endCh && !eof) { // well-formedness constraint if (ch == '<') return errorAndResult("'<' not allowed in attrib value", "") else if (ch == SU) truncatedError("") From 968f7bd94e934c781c19e25847ab09ac98cfbaf6 Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Tue, 23 Dec 2014 13:08:38 -0800 Subject: [PATCH 2/3] SI-9060 Accept 5th-edition names Valid name chars list is updated. Only 16-bit Char is supported, not arbitrary code points. Previous logic to exclude ':' as start char is deleted. --- src/main/scala/scala/xml/Utility.scala | 8 +++ .../scala/xml/parsing/MarkupParser.scala | 5 +- .../scala/scala/xml/parsing/TokenTests.scala | 26 ++++++++- src/test/scala/scala/xml/UtilityTest.scala | 4 +- .../xml/parsing/ConstructingParserTest.scala | 55 +++++++++++++++++++ 5 files changed, 91 insertions(+), 7 deletions(-) create mode 100644 src/test/scala/scala/xml/parsing/ConstructingParserTest.scala diff --git a/src/main/scala/scala/xml/Utility.scala b/src/main/scala/scala/xml/Utility.scala index e9aa88de6..9bcb15c98 100755 --- a/src/main/scala/scala/xml/Utility.scala +++ b/src/main/scala/scala/xml/Utility.scala @@ -273,6 +273,14 @@ object Utility extends AnyRef with parsing.TokenTests { case i => Some(name.substring(0, i)) } + object Qualified { + /** No ':' yields null namespace; leading or trailing yields empty string. */ + def unapply(name: String): Option[(String, String)] = name indexOf ':' match { + case -1 => Some(null, name) + case i => Some(name.substring(0, i), name.substring(i + 1)) // start out of range OK + } + } + /** * Returns a hashcode for the given constituents of a node */ diff --git a/src/main/scala/scala/xml/parsing/MarkupParser.scala b/src/main/scala/scala/xml/parsing/MarkupParser.scala index 612cc88e3..1cdaccb9c 100755 --- a/src/main/scala/scala/xml/parsing/MarkupParser.scala +++ b/src/main/scala/scala/xml/parsing/MarkupParser.scala @@ -573,10 +573,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests { def element1(pscope: NamespaceBinding): NodeSeq = { val pos = this.pos val (qname, (aMap, scope)) = xTag(pscope) - val (pre, local) = Utility.prefix(qname) match { - case Some(p) => (p, qname drop p.length + 1) - case _ => (null, qname) - } + val Utility.Qualified(pre, local) = qname val ts = { if (ch == '/') { // empty element xToken("/>") diff --git a/src/main/scala/scala/xml/parsing/TokenTests.scala b/src/main/scala/scala/xml/parsing/TokenTests.scala index f528f7db0..e500b3a49 100644 --- a/src/main/scala/scala/xml/parsing/TokenTests.scala +++ b/src/main/scala/scala/xml/parsing/TokenTests.scala @@ -35,7 +35,28 @@ trait TokenTests { def isAlpha(c: Char) = (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') def isAlphaDigit(c: Char) = isAlpha(c) || (c >= '0' && c <= '9') - /** + def isNameChar(c: Char): Boolean = ( + isNameStart(c) || + (c >= '0' && c <= '9') || + c == '-' || + c == '.' || + c == 0xB7 || + (c >= 0x300 && c <= 0x36F) || + (c >= 0x203F && c <= 0x2040) + ) + def isNameStart(c: Char): Boolean = ( + if (c < 0x00C0) isAlpha(c) || c == ':' || c == '_' + else if (c < 0x0300) c != 0xD7 && c != 0xF7 + else if (c < 0x2000) c >= 0x370 && c != 0x37E + else if (c < 0x3001) c == 0x200C || c == 0x200D || (0x2070 to 0x218F contains c) || + (0x2C00 to 0x2FEF contains c) + else if (c < 0xD800) true + else if (c < 0x10000) (0xF900 to 0xFDCF contains c) || (0xFDF0 to 0xFFFD contains c) + else false // codepoint < 0xF0000 + ) + + /* Documenting that Appendix B is obsolete. And nested comments rock. + /* * {{{ * NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' * | CombiningChar | Extender @@ -54,7 +75,7 @@ trait TokenTests { }) } - /** + /* * {{{ * NameStart ::= ( Letter | '_' ) * }}} @@ -74,6 +95,7 @@ trait TokenTests { case _ => ch == '_' } } + */ /** * {{{ diff --git a/src/test/scala/scala/xml/UtilityTest.scala b/src/test/scala/scala/xml/UtilityTest.scala index eba631efb..80a2db765 100644 --- a/src/test/scala/scala/xml/UtilityTest.scala +++ b/src/test/scala/scala/xml/UtilityTest.scala @@ -13,7 +13,9 @@ class UtilityTest { @Test def isNameStart: Unit = { assertTrue(Utility.isNameStart('b')) - assertFalse(Utility.isNameStart(':')) + // No indication of who relied on this behavior. + //assertFalse(Utility.isNameStart(':')) + assertTrue(Utility.isNameStart(':')) } @Test diff --git a/src/test/scala/scala/xml/parsing/ConstructingParserTest.scala b/src/test/scala/scala/xml/parsing/ConstructingParserTest.scala new file mode 100644 index 000000000..8acf56681 --- /dev/null +++ b/src/test/scala/scala/xml/parsing/ConstructingParserTest.scala @@ -0,0 +1,55 @@ +package scala.xml +package parsing + +import org.junit.Test +import org.junit.Ignore +import org.junit.runner.RunWith +import org.junit.runners.JUnit4 +import org.junit.Assert +import Assert._ +import scala.xml.JUnitAssertsForXML.assertEquals + +class ConstructingParserTest { + + import scala.io.Source.fromString + import ConstructingParser.fromSource + //import scala.xml.{ NodeSeq, TopScope } + import XML.loadString + + private def parse(s:String) = fromSource(fromString(s), false).element(TopScope) + + // fifth edition + // ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | + // [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | + // [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + // and + // "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + // + @Test def `SI-9060: valid chars in names`(): Unit = { + // promote chars to ints + def c(a: Int) = a to a + def cs(a: Int, b: Int) = a to b + val ranges = List( + c(':'), cs('A','Z'), cs('a','z'), cs('_','_'), + 0xC0 to 0x2FF filter { case 0xD7 | 0xF7 => false case _ => true }, + 0x370 to 0x37D, 0x37F to 0x1FFF, 0x200C to 0x200D, 0x2070 to 0x218F, + 0x2C00 to 0x2FEF, 0x3001 to 0xD7FF, 0xF900 to 0xFDCF, 0xFDF0 to 0xFFFD + // , 0x10000 to 0xEFFFF // TODO, code points + ) + val others = List[Range]( + c('-'), c('.'), '0'.toInt to '9', 0xB7 to 0xB7, 0x300 to 0x36F, 0x203F to 0x2040 + ) + + val tester = new TokenTests { } + for (r <- ranges ; i <- r) assertTrue(f"NameStart: $i%#x (${i.toChar})", tester.isNameStart(i.toChar)) + for (r <- others ; i <- r) assertTrue(f"NameChar: $i%#x (${i.toChar})", tester.isNameChar(i.toChar)) + } + + // notice the middle dot + @Test def `SI-9060: problematic char in name`(): Unit = { + val problematic = """""" + val expected = problematic + assertEquals(expected, parse(problematic)) + assertEquals(expected, loadString(problematic)) + } +} From 88c2facb817fe9ef94c945c3db12341cf9ec4c7f Mon Sep 17 00:00:00 2001 From: Som Snytt Date: Tue, 23 Dec 2014 13:25:27 -0800 Subject: [PATCH 3/3] SI-9060 Refrain from println and other test improv Also avoid deprecation messages and other distractions. Make sure threads are cut. (Exploring new computing metaphors.) --- .../scala/xml/parsing/MarkupParser.scala | 3 +- src/test/scala/scala/xml/ShouldCompile.scala | 4 +- src/test/scala/scala/xml/XMLTest.scala | 44 +++++++++++-------- .../scala/xml/pull/XMLEventReaderTest.scala | 36 +++++++++------ 4 files changed, 51 insertions(+), 36 deletions(-) diff --git a/src/main/scala/scala/xml/parsing/MarkupParser.scala b/src/main/scala/scala/xml/parsing/MarkupParser.scala index 1cdaccb9c..016fe62f7 100755 --- a/src/main/scala/scala/xml/parsing/MarkupParser.scala +++ b/src/main/scala/scala/xml/parsing/MarkupParser.scala @@ -264,8 +264,7 @@ trait MarkupParser extends MarkupParserCommon with TokenTests { theNode = m } if (1 != elemCount) { - reportSyntaxError("document must contain exactly one element") - Console.println(children.toList) + reportSyntaxError(s"document must contain exactly one element but has $elemCount") } doc.children = children diff --git a/src/test/scala/scala/xml/ShouldCompile.scala b/src/test/scala/scala/xml/ShouldCompile.scala index f2ddffce5..e586fb7ca 100644 --- a/src/test/scala/scala/xml/ShouldCompile.scala +++ b/src/test/scala/scala/xml/ShouldCompile.scala @@ -64,7 +64,9 @@ class B { // SI-5858 object SI_5858 { - new Elem(null, null, Null, TopScope, Nil: _*) // was ambiguous + @deprecated("ignore me", since="") + def e = + new Elem(null, null, Null, TopScope, Nil: _*) // was ambiguous } class Floozy { diff --git a/src/test/scala/scala/xml/XMLTest.scala b/src/test/scala/scala/xml/XMLTest.scala index 01431baf1..a5c70bf9b 100644 --- a/src/test/scala/scala/xml/XMLTest.scala +++ b/src/test/scala/scala/xml/XMLTest.scala @@ -1,19 +1,21 @@ package scala.xml -import org.junit.{Test => UnitTest} +import org.junit.{ Test => UnitTest } import org.junit.Ignore import org.junit.runner.RunWith import org.junit.runners.JUnit4 -import org.junit.Assert.assertTrue -import org.junit.Assert.assertFalse -import org.junit.Assert.assertEquals -import scala.xml.parsing.ConstructingParser +import org.junit.Assert._ import java.io.StringWriter import java.io.BufferedOutputStream import java.io.ByteArrayOutputStream import java.io.StringReader +import java.io.PrintStream import scala.collection.Iterable +import scala.io.{ Source => Input } import scala.xml.Utility.sort +import scala.xml.parsing.{ ConstructingParser, FatalError } +import scala.util.{ Try, Failure } +import scala.language.postfixOps object XMLTest { val e: scala.xml.MetaData = Null //Node.NoAttributes @@ -469,23 +471,27 @@ Ours is the portal of hope, come as you are." } @UnitTest - def ioPosition = { - val out = new ByteArrayOutputStream - Console.withErr(out) { - Console.withOut(out) { - try { - xml.parsing.ConstructingParser.fromSource(io.Source.fromString(""), false).document() - } catch { - case e: Exception => println(e.getMessage) - } + def ioPosition(): Unit = { + // io.Source that captures errors + class TestInput(text: String) extends Input { + val out = new ByteArrayOutputStream + val ps = new PrintStream(out, true, "UTF8") + override val iter = Input fromString "" + override def report(pos: Int, msg: String, out: PrintStream) = super.report(pos, msg, ps) + def result: String = { + ps.flush() + out.toString("UTF8").replaceAllLiterally(0.toChar.toString, "*") } } - out.flush() + val src = new TestInput("") + Try(ConstructingParser.fromSource(src, false).document()) match { + case Failure(e: FatalError) => assertEquals("expected closing tag of foo", e.getMessage) + case _ => fail("expected exception") + } assertEquals( - """:1:5: '/' expected instead of '' ^ -:1:5: name expected, but char '' cannot start a name ^ -expected closing tag of foo -""", out.toString) + """:1:5: '/' expected instead of '*' ^ +:1:5: name expected, but char '*' cannot start a name ^ +""", src.result) } def f(s: String) = { diff --git a/src/test/scala/scala/xml/pull/XMLEventReaderTest.scala b/src/test/scala/scala/xml/pull/XMLEventReaderTest.scala index be55938ca..c2209bee7 100644 --- a/src/test/scala/scala/xml/pull/XMLEventReaderTest.scala +++ b/src/test/scala/scala/xml/pull/XMLEventReaderTest.scala @@ -8,41 +8,49 @@ import org.junit.Assert.assertTrue import org.junit.Assert.assertFalse import org.junit.Assert.assertEquals +import java.io.PrintStream import scala.io.Source +import scala.xml.parsing.FatalError class XMLEventReaderTest { - val src = Source.fromString("!") + def reading(text: String)(f: XMLEventReader => Unit): Unit = { + val s = new Source { + override protected val iter = Source fromString text + override def report(pos: Int, msg: String, out: PrintStream) = () + } + val r = new XMLEventReader(s) + try f(r) + finally r.stop() + } @Test - def pull: Unit = { - val er = new XMLEventReader(src) - assertTrue(er.next match { + def pull() = reading("!") { r => + assertTrue(r.next() match { case EvElemStart(_, "hello", _, _) => true case _ => false }) - assertTrue(er.next match { + assertTrue(r.next() match { case EvElemStart(_, "world", _, _) => true case _ => false }) - assertTrue(er.next match { + assertTrue(r.next() match { case EvElemEnd(_, "world") => true case _ => false }) - assertTrue(er.next match { + assertTrue(r.next() match { case EvText("!") => true case _ => false }) - assertTrue(er.next match { + assertTrue(r.next() match { case EvElemEnd(_, "hello") => true case _ => false }) - er.stop // allow thread to be garbage-collected } - @Test(expected = classOf[Exception]) + @Test(expected = classOf[FatalError]) //expected closing tag of verbosegc def missingTagTest: Unit = { - val data= + val data = """ | | @@ -54,8 +62,8 @@ class XMLEventReaderTest { | |""".stripMargin - val er = new XMLEventReader(Source.fromString(data)) - while(er.hasNext) er.next() - er.stop() + reading(data) { r => + while (r.hasNext) r.next() + } } }