Optimize buffering in Scanner

odersky · odersky · commit b67b0b7eb472 · 2020-09-01T14:43:35.000+02:00
Avoid an extra toCharArray when converting a buffer to a name
diff --git a/compiler/src/dotty/tools/dotc/parsing/Scanners.scala b/compiler/src/dotty/tools/dotc/parsing/Scanners.scala
@@ -7,7 +7,7 @@ import core.StdNames._, core.Comments._
 import util.SourceFile
 import java.lang.Character.isDigit
 import scala.internal.Chars._
-import util.SourcePosition
+import util.{SourcePosition, CharBuffer}
 import util.Spans.Span
 import config.Config
 import config.Printers.lexical
@@ -21,38 +21,6 @@ import config.Feature.migrateTo3
 import config.SourceVersion._
 import reporting.Message
 
-object Cbufs {
-  import java.lang.StringBuilder
-
-  private final val TargetCapacity = 256
-
-  opaque type Cbuf = StringBuilder
-  object Cbuf:
-    def apply(): Cbuf = new StringBuilder(TargetCapacity)
-
-  extension (buf: Cbuf):
-    def clear(): Unit = {
-      if buf.capacity() > TargetCapacity then
-        buf.setLength(TargetCapacity)
-        buf.trimToSize()
-      end if
-      buf.setLength(0)
-    }
-    def toCharArray: Array[Char] = {
-      val n = buf.length()
-      val res = new Array[Char](n)
-      buf.getChars(0, n, res, 0)
-      res
-    }
-    def append(c: Char): buf.type = { buf.append(c) ; buf }
-    def isEmpty: Boolean = buf.length() == 0
-    def length: Int = buf.length()
-    def last: Char = buf.charAt(buf.length() - 1)
-  end extension
-}
-
-import Cbufs._
-
 object Scanners {
 
   /** Offset into source character array */
@@ -142,22 +110,16 @@ object Scanners {
 
     /** A character buffer for literals
       */
-    protected val litBuf = Cbuf()
+    protected val litBuf = CharBuffer()
 
     /** append Unicode character to "litBuf" buffer
       */
     protected def putChar(c: Char): Unit = litBuf.append(c)
 
-    /** Return buffer contents and clear */
-    def flushBuf(buf: Cbuf): String = {
-      val str = buf.toString
-      buf.clear()
-      str
-    }
-
     /** Clear buffer and set name and token */
     def finishNamed(idtoken: Token = IDENTIFIER, target: TokenData = this): Unit = {
-      target.name = termName(flushBuf(litBuf))
+      target.name = termName(litBuf.chars, 0, litBuf.length)
+      litBuf.clear()
       target.token = idtoken
       if (idtoken == IDENTIFIER)
         target.token = toToken(target.name)
@@ -168,7 +130,8 @@ object Scanners {
 
     /** Clear buffer and set string */
     def setStrVal(): Unit =
-      strVal = flushBuf(litBuf)
+      strVal = litBuf.toString
+      litBuf.clear()
 
     @inline def isNumberSeparator(c: Char): Boolean = c == '_'
 
@@ -241,7 +204,7 @@ object Scanners {
     def getDocComment(pos: Int): Option[Comment] = docstringMap.get(pos)
 
     /** A buffer for comments */
-    private val commentBuf = Cbuf()
+    private val commentBuf = CharBuffer()
 
     private def handleMigration(keyword: Token): Token =
       if keyword == ERASED && !ctx.settings.YerasedTerms.value then IDENTIFIER
@@ -888,7 +851,8 @@ object Scanners {
       def finishComment(): Boolean = {
         if (keepComments) {
           val pos = Span(start, charOffset - 1, start)
-          val comment = Comment(pos, flushBuf(commentBuf))
+          val comment = Comment(pos, commentBuf.toString)
+          commentBuf.clear()
           commentPosBuf += pos
 
           if (comment.isDocComment)
diff --git a/compiler/src/dotty/tools/dotc/util/CharBuffer.scala b/compiler/src/dotty/tools/dotc/util/CharBuffer.scala
@@ -0,0 +1,28 @@
+package dotty.tools
+package dotc
+package util
+
+/** A character buffer that exposes the internal array for reading.
+ *  That way we can avoid copying when converting to names.
+ */
+class CharBuffer(initialSize: Int = 1024):
+  private var cs: Array[Char] = new Array[Char](initialSize)
+  private var len: Int = 0
+
+  def append(ch: Char): Unit =
+    if len == cs.length then
+      val cs1 = new Array[Char](len * 2)
+      Array.copy(cs, 0, cs1, 0, len)
+      cs = cs1
+    cs(len) = ch
+    len += 1
+
+  def chars = cs
+  def length = len
+  def isEmpty: Boolean = len == 0
+  def last: Char = cs(len - 1)
+  def clear(): Unit = len = 0
+
+  override def toString = String(cs, 0, len)
+
+