From 7b97e37cca88a20e5ced78af51e3aff92eb9b2d7 Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Wed, 21 Jun 2023 14:50:40 +0200 Subject: [PATCH 1/2] make Node.toString stack safe --- shared/src/main/scala/scala/xml/Utility.scala | 95 ++++++++++++------- .../test/scala/scala/xml/UtilityTest.scala | 6 ++ 2 files changed, 65 insertions(+), 36 deletions(-) diff --git a/shared/src/main/scala/scala/xml/Utility.scala b/shared/src/main/scala/scala/xml/Utility.scala index a1c6fd99..44c2ca83 100755 --- a/shared/src/main/scala/scala/xml/Utility.scala +++ b/shared/src/main/scala/scala/xml/Utility.scala @@ -13,6 +13,7 @@ package scala package xml +import scala.annotation.tailrec import scala.collection.mutable import scala.language.implicitConversions import scala.collection.Seq @@ -191,9 +192,8 @@ object Utility extends AnyRef with parsing.TokenTests { decodeEntities: Boolean = true, preserveWhitespace: Boolean = false, minimizeTags: Boolean = false - ): StringBuilder = { + ): StringBuilder = serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, if (minimizeTags) MinimizeMode.Always else MinimizeMode.Never) - } /** * Serialize an XML Node to a StringBuilder. @@ -212,32 +212,64 @@ object Utility extends AnyRef with parsing.TokenTests { preserveWhitespace: Boolean = false, minimizeTags: MinimizeMode.Value = MinimizeMode.Default ): StringBuilder = { - x match { - case c: Comment => if (!stripComments) c.buildString(sb); sb - case s: SpecialNode => s.buildString(sb) - case g: Group => - for (c <- g.nodes) serialize(c, g.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags); sb - case el: Elem => - // print tag with namespace declarations - sb.append('<') - el.nameToString(sb) - if (el.attributes.ne(null)) el.attributes.buildString(sb) - el.scope.buildString(sb, pscope) - if (el.child.isEmpty && - (minimizeTags == MinimizeMode.Always || - (minimizeTags == MinimizeMode.Default && el.minimizeEmpty))) { - // no children, so use short form: - sb.append("/>") - } else { - // children, so use long form: ... - sb.append('>') - sequenceToXML(el.child, el.scope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) + serializeImpl(List(x), pscope, false, stripComments, minimizeTags, sb) + sb + } + + private def serializeImpl( + ns: Seq[Node], + pscope: NamespaceBinding, + spaced: Boolean, + stripComments: Boolean, + minimizeTags: MinimizeMode.Value, + sb: StringBuilder + ): Unit = { + @tailrec def ser(nss: List[Seq[Node]], pscopes: List[NamespaceBinding], spaced: List[Boolean], toClose: List[Node]): Unit = nss match { + case List(ns) if ns.isEmpty => + case ns :: rests if ns.isEmpty => + if (toClose.head != null) { sb.append("') } - case _ => throw new IllegalArgumentException("Don't know how to serialize a " + x.getClass.getName) + ser(rests, pscopes.tail, spaced.tail, toClose.tail) + case ns1 :: r => + val (n, ns) = (ns1.head, ns1.tail) + def sp(): Unit = if (ns.nonEmpty && spaced.head) sb.append(' ') + n match { + case c: Comment => + if (!stripComments) { + c.buildString(sb) + sp() + } + ser(ns :: r, pscopes, spaced, toClose) + case s: SpecialNode => + s.buildString(sb) + sp() + ser(ns :: r, pscopes, spaced, toClose) + case g: Group => + ser(g.nodes :: ns :: r, g.scope :: pscopes, false :: spaced, null :: toClose) + case e: Elem => + sb.append('<') + e.nameToString(sb) + if (e.attributes.ne(null)) e.attributes.buildString(sb) + e.scope.buildString(sb, pscopes.head) + if (e.child.isEmpty && + (minimizeTags == MinimizeMode.Always || + (minimizeTags == MinimizeMode.Default && e.minimizeEmpty))) { + // no children, so use short form: + sb.append("/>") + sp() + ser(ns :: r, pscopes, spaced, toClose) + } else { + sb.append('>') + val csp = e.child.forall(isAtomAndNotText) + ser(e.child :: ns :: r, e.scope :: pscopes, csp :: spaced, e :: toClose) + } + case n => throw new IllegalArgumentException("Don't know how to serialize a " + n.getClass.getName) + } } + ser(List(ns), List(pscope), List(spaced), Nil) } def sequenceToXML( @@ -248,18 +280,9 @@ object Utility extends AnyRef with parsing.TokenTests { decodeEntities: Boolean = true, preserveWhitespace: Boolean = false, minimizeTags: MinimizeMode.Value = MinimizeMode.Default - ): Unit = { - if (children.isEmpty) () - else if (children.forall(isAtomAndNotText)) { // add space - val it: Iterator[Node] = children.iterator - val f: Node = it.next() - serialize(f, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - while (it.hasNext) { - val x: Node = it.next() - sb.append(' ') - serialize(x, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) - } - } else children.foreach { serialize(_, pscope, sb, stripComments, decodeEntities, preserveWhitespace, minimizeTags) } + ): Unit = if (children.nonEmpty) { + val spaced = children.forall(isAtomAndNotText) + serializeImpl(children, pscope, spaced, stripComments, minimizeTags, sb) } def splitName(name: String): (Option[String], String) = { diff --git a/shared/src/test/scala/scala/xml/UtilityTest.scala b/shared/src/test/scala/scala/xml/UtilityTest.scala index 87384d3e..4553a6f4 100644 --- a/shared/src/test/scala/scala/xml/UtilityTest.scala +++ b/shared/src/test/scala/scala/xml/UtilityTest.scala @@ -230,4 +230,10 @@ class UtilityTest { assertEquals("", result) } + @Test + def toStringStackSafe(): Unit = { + val xml = (1 to 5000).foldRight() { case (_, n) => {n}} + xml.toString + } + } From 5375483b9da428549289ecce3d9a28b67ee9744c Mon Sep 17 00:00:00 2001 From: Lukas Rytz Date: Thu, 22 Jun 2023 10:58:58 +0200 Subject: [PATCH 2/2] Use List[Node] in toString --- shared/src/main/scala/scala/xml/Utility.scala | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/shared/src/main/scala/scala/xml/Utility.scala b/shared/src/main/scala/scala/xml/Utility.scala index 44c2ca83..bc6b02a8 100755 --- a/shared/src/main/scala/scala/xml/Utility.scala +++ b/shared/src/main/scala/scala/xml/Utility.scala @@ -224,17 +224,16 @@ object Utility extends AnyRef with parsing.TokenTests { minimizeTags: MinimizeMode.Value, sb: StringBuilder ): Unit = { - @tailrec def ser(nss: List[Seq[Node]], pscopes: List[NamespaceBinding], spaced: List[Boolean], toClose: List[Node]): Unit = nss match { - case List(ns) if ns.isEmpty => - case ns :: rests if ns.isEmpty => + @tailrec def ser(nss: List[List[Node]], pscopes: List[NamespaceBinding], spaced: List[Boolean], toClose: List[Node]): Unit = nss match { + case List(Nil) => + case Nil :: rests => if (toClose.head != null) { sb.append("') } ser(rests, pscopes.tail, spaced.tail, toClose.tail) - case ns1 :: r => - val (n, ns) = (ns1.head, ns1.tail) + case (n :: ns) :: r => def sp(): Unit = if (ns.nonEmpty && spaced.head) sb.append(' ') n match { case c: Comment => @@ -248,7 +247,7 @@ object Utility extends AnyRef with parsing.TokenTests { sp() ser(ns :: r, pscopes, spaced, toClose) case g: Group => - ser(g.nodes :: ns :: r, g.scope :: pscopes, false :: spaced, null :: toClose) + ser(g.nodes.toList :: ns :: r, g.scope :: pscopes, false :: spaced, null :: toClose) case e: Elem => sb.append('<') e.nameToString(sb) @@ -264,12 +263,12 @@ object Utility extends AnyRef with parsing.TokenTests { } else { sb.append('>') val csp = e.child.forall(isAtomAndNotText) - ser(e.child :: ns :: r, e.scope :: pscopes, csp :: spaced, e :: toClose) + ser(e.child.toList :: ns :: r, e.scope :: pscopes, csp :: spaced, e :: toClose) } case n => throw new IllegalArgumentException("Don't know how to serialize a " + n.getClass.getName) } } - ser(List(ns), List(pscope), List(spaced), Nil) + ser(List(ns.toList), List(pscope), List(spaced), Nil) } def sequenceToXML(