Skip to content

Commit 82d4a01

Browse files
committed
SI-8879 fix quadratic reading time in StreamReader
StreamReader.nextEol used to loop all the way to Eol every time an element was read. That's very costly when lines are long. Furthermore, it used to call PagedSeq.length, forcing PagedSeq to load the whole input in memory, even when a single character was read. nextEol is now saved as part of the state of StreamReader, and is passed to child readers when created (as long as we do not read past the end of the line). Thus it computed only once per line, whatever the length. With the example in the ticket (SI-8879), we get: * before: User time (seconds): 82.12 System time (seconds): 0.07 Elapsed (wall clock) time (h:mm:ss or m:ss): 1:21.52 * after: User time (seconds): 1.05 System time (seconds): 0.06 Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.68 * for comparison, using PagedSeqReader directly: User time (seconds): 1.06 System time (seconds): 0.06 Elapsed (wall clock) time (h:mm:ss or m:ss): 0:00.69 `isDefinedAt` is used instead of `length` so that pages beyond the tested index do not need to be read. The test only tests this part.
1 parent 3ee2e0f commit 82d4a01

File tree

2 files changed

+55
-11
lines changed

2 files changed

+55
-11
lines changed

src/main/scala/scala/util/parsing/input/StreamReader.scala

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -45,27 +45,29 @@ object StreamReader {
4545
* @author Miles Sabin
4646
* @author Martin Odersky
4747
*/
48-
sealed class StreamReader(seq: PagedSeq[Char], off: Int, lnum: Int) extends PagedSeqReader(seq, off) {
49-
import StreamReader._
48+
sealed class StreamReader private (seq: PagedSeq[Char], off: Int, lnum: Int, nextEol0: Int) extends PagedSeqReader(seq, off) {
49+
def this(seq: PagedSeq[Char], off: Int, lnum: Int) = this(seq, off, lnum, -1)
50+
51+
import StreamReader.EofCh
5052

5153
override def rest: StreamReader =
52-
if (off == seq.length) this
54+
if (!seq.isDefinedAt(off)) this
5355
else if (seq(off) == '\n')
54-
new StreamReader(seq.slice(off + 1), 0, lnum + 1)
55-
else new StreamReader(seq, off + 1, lnum)
56+
new StreamReader(seq.slice(off + 1), 0, lnum + 1, -1)
57+
else new StreamReader(seq, off + 1, lnum, nextEol0)
5658

57-
private def nextEol = {
59+
private def nextEol = if (nextEol0 == -1) {
5860
var i = off
59-
while (i < seq.length && seq(i) != '\n' && seq(i) != EofCh) i += 1
61+
while (seq.isDefinedAt(i) && seq(i) != '\n' && seq(i) != EofCh) i += 1
6062
i
61-
}
63+
} else nextEol0
6264

6365
override def drop(n: Int): StreamReader = {
6466
val eolPos = nextEol
65-
if (eolPos < off + n && eolPos < seq.length)
66-
new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1).drop(off + n - (eolPos + 1))
67+
if (eolPos < off + n && seq.isDefinedAt(eolPos))
68+
new StreamReader(seq.slice(eolPos + 1), 0, lnum + 1, -1).drop(off + n - (eolPos + 1))
6769
else
68-
new StreamReader(seq, off + n, lnum)
70+
new StreamReader(seq, off + n, lnum, eolPos)
6971
}
7072

7173
override def pos: Position = new Position {
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
import scala.util.parsing.input._
2+
import scala.collection.immutable.PagedSeq
3+
4+
import org.junit.Test
5+
import org.junit.Assert.fail
6+
7+
class t8879 {
8+
9+
@Test
10+
def test: Unit = {
11+
val testPagedSeq = {
12+
var nbpage = 0
13+
def more(data: Array[Char], start: Int, len: Int): Int = {
14+
if (nbpage < 1) {
15+
var i = 0
16+
while (i < len && nbpage < 3) {
17+
if (i % 100 != 0) {
18+
data(start + i) = 'a'
19+
} else {
20+
data(start + i) = '\n'
21+
}
22+
i += 1
23+
}
24+
if (i == 0) -1 else {
25+
nbpage += 1
26+
i
27+
}
28+
} else {
29+
fail("Should not read more than 1 page!")
30+
0
31+
}
32+
}
33+
34+
new PagedSeq(more(_: Array[Char], _: Int, _: Int))
35+
}
36+
37+
val s = new StreamReader(testPagedSeq, 0, 1)
38+
39+
// should not trigger reading of the second page
40+
s.drop(20)
41+
}
42+
}

0 commit comments

Comments
 (0)