Skip to content

Commit 301011e

Browse files
committed
Performance optimization - Iterator span, collect, dropWhile
Rewrite of span to avoid double-indirection of `.buffered` and to avoid use of `mutable.Queue` unless it is absolutely necessary. Rewrite of `span` and `dropWhile` to also avoid `.buffered` (less DRY but single vs. double indirection and object allocation). Performance improvements: ``` method reason =========== =============================================================== collect 2.3x faster on small collections, 1.5x on large span 1.6-1.7x faster on small collections 0.85x-1.8x slower/faster on large collections depending on how much must be cached (0.85x all, 1.8x none) dropWhile 1.2x faster on small collections, half the garbage ```
1 parent 0a7a2c3 commit 301011e

File tree

1 file changed

+138
-35
lines changed

1 file changed

+138
-35
lines changed

src/library/scala/collection/Iterator.scala

Lines changed: 138 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -479,13 +479,27 @@ trait Iterator[+A] extends TraversableOnce[A] {
479479
* @note Reuse: $consumesAndProducesIterator
480480
*/
481481
@migration("`collect` has changed. The previous behavior can be reproduced with `toSeq`.", "2.8.0")
482-
def collect[B](pf: PartialFunction[A, B]): Iterator[B] = {
483-
val self = buffered
484-
new AbstractIterator[B] {
485-
private def skip() = while (self.hasNext && !pf.isDefinedAt(self.head)) self.next()
486-
def hasNext = { skip(); self.hasNext }
487-
def next() = { skip(); pf(self.next()) }
482+
def collect[B](pf: PartialFunction[A, B]): Iterator[B] = new AbstractIterator[B] {
483+
// Manually buffer to avoid extra layer of wrapping with buffered
484+
private[this] var hd: A = _
485+
486+
// Little state machine to keep track of where we are
487+
// Seek = 0; Found = 1; Empty = -1
488+
// Not in vals because scalac won't make them static (@inline def only works with -optimize)
489+
// BE REALLY CAREFUL TO KEEP COMMENTS AND NUMBERS IN SYNC!
490+
private[this] var status = 0/*Seek*/
491+
492+
def hasNext = {
493+
while (status == 0/*Seek*/) {
494+
if (self.hasNext) {
495+
hd = self.next()
496+
if (pf.isDefinedAt(hd)) status = 1/*Found*/
497+
}
498+
else status = -1/*Empty*/
499+
}
500+
status == 1/*Found*/
488501
}
502+
def next() = if (hasNext) { status = 0/*Seek*/; pf(hd) } else Iterator.empty.next()
489503
}
490504

491505
/** Produces a collection containing cumulative results of applying the
@@ -587,33 +601,105 @@ trait Iterator[+A] extends TraversableOnce[A] {
587601
* @note Reuse: $consumesOneAndProducesTwoIterators
588602
*/
589603
def span(p: A => Boolean): (Iterator[A], Iterator[A]) = {
590-
val self = buffered
591-
592-
// Must be a named class to avoid structural call to finish from trailing iterator
604+
/*
605+
* Giving a name to following iterator (as opposed to trailing) because
606+
* anonymous class is represented as a structural type that trailing
607+
* iterator is referring (the finish() method) and thus triggering
608+
* handling of structural calls. It's not what's intended here.
609+
*/
593610
class Leading extends AbstractIterator[A] {
594-
private val drained = new mutable.Queue[A]
595-
private var finished = false
596-
def finish(): Unit = {
597-
require(!finished)
598-
finished = true
599-
while (selfish) drained += self.next
611+
var lookahead: mutable.Queue[A] = null
612+
var hd: A = _
613+
/* Status is kept with magic numbers
614+
* 1 means next element is in hd and we're still reading into this iterator
615+
* 0 means we're still reading but haven't found a next element
616+
* -1 means we are done reading into the iterator, so we must rely on lookahead
617+
* -2 means we are done but have saved hd for the other iterator to use as its first element
618+
*/
619+
var status = 0
620+
private def store(a: A) {
621+
if (lookahead == null) lookahead = new mutable.Queue[A]
622+
lookahead += a
623+
}
624+
def hasNext = {
625+
if (status < 0) (lookahead ne null) && lookahead.nonEmpty
626+
else if (status > 0) true
627+
else {
628+
if (self.hasNext) {
629+
hd = self.next()
630+
status = if (p(hd)) 1 else -2
631+
}
632+
else status = -1
633+
status > 0
634+
}
600635
}
601-
private def selfish = self.hasNext && p(self.head)
602-
def hasNext = if (finished) drained.nonEmpty else selfish
603636
def next() = {
604-
if (finished) drained.dequeue()
605-
else if (selfish) self.next()
637+
if (hasNext) {
638+
if (status == 1) { status = 0; hd }
639+
else lookahead.dequeue()
640+
}
606641
else empty.next()
607642
}
643+
def finish(): Boolean = {
644+
if (status == -1) false
645+
else if (status == -2) {
646+
status = -1
647+
true
648+
}
649+
else {
650+
if (status == 1) store(hd)
651+
while (self.hasNext) {
652+
val a = self.next()
653+
if (p(a)) store(a)
654+
else {
655+
hd = a
656+
status = -1
657+
return true
658+
}
659+
}
660+
false
661+
}
662+
}
608663
}
664+
609665
val leading = new Leading
666+
610667
val trailing = new AbstractIterator[A] {
611-
private lazy val it = {
612-
leading.finish()
613-
self
668+
private[this] var myLeading = leading
669+
/* Status flags meanings:
670+
* -1 not yet accesssed
671+
* 0 single element waiting in leading
672+
* 1 defer to self
673+
*/
674+
private[this] var status = -1
675+
def hasNext = {
676+
if (status > 0) self.hasNext
677+
else {
678+
if (status == 0) true
679+
else if (myLeading.finish()) {
680+
status = 0
681+
true
682+
}
683+
else {
684+
status = 1
685+
myLeading = null
686+
self.hasNext
687+
}
688+
}
614689
}
615-
def hasNext = it.hasNext
616-
def next() = it.next()
690+
def next() = {
691+
if (hasNext) {
692+
if (status > 0) self.next()
693+
else {
694+
status = 1
695+
val ans = myLeading.hd
696+
myLeading = null
697+
ans
698+
}
699+
}
700+
else Iterator.empty.next()
701+
}
702+
617703
override def toString = "unknown-if-empty iterator"
618704
}
619705

@@ -627,18 +713,35 @@ trait Iterator[+A] extends TraversableOnce[A] {
627713
* @return an iterator consisting of the remaining elements
628714
* @note Reuse: $consumesAndProducesIterator
629715
*/
630-
def dropWhile(p: A => Boolean): Iterator[A] = {
631-
val self = buffered
632-
new AbstractIterator[A] {
633-
var dropped = false
634-
private def skip() =
635-
if (!dropped) {
636-
while (self.hasNext && p(self.head)) self.next()
637-
dropped = true
716+
def dropWhile(p: A => Boolean): Iterator[A] = new AbstractIterator[A] {
717+
// Magic value: -1 = hasn't dropped, 0 = found first, 1 = defer to parent iterator
718+
private[this] var status = -1
719+
// Local buffering to avoid double-wrap with .buffered
720+
private[this] var fst: A = _
721+
def hasNext: Boolean =
722+
if (status == 1) self.hasNext
723+
else if (status == 0) true
724+
else {
725+
while (self.hasNext) {
726+
val a = self.next()
727+
if (!p(a)) {
728+
fst = a
729+
status = 0
730+
return true
731+
}
638732
}
639-
def hasNext = { skip(); self.hasNext }
640-
def next() = { skip(); self.next() }
641-
}
733+
status = 1
734+
false
735+
}
736+
def next() =
737+
if (hasNext) {
738+
if (status == 1) self.next()
739+
else {
740+
status = 1
741+
fst
742+
}
743+
}
744+
else Iterator.empty.next()
642745
}
643746

644747
/** Creates an iterator formed from this iterator and another iterator

0 commit comments

Comments
 (0)