Skip to content

Commit 7e5521a

Browse files
committed
adds groupUntilChanged extension method on scala.collection.Iterator
Iterator#groupUntilChanged constructs an iterator where consecutive elements of the original iterator are accumulated as long as the output of a key function for each element doesn't change. This operation makes sense as soon as you are trying to process an iterator where you know the elements will be sorted in a certain way and you need to group them without loading all the data in memory. For instance * processing a file where the ordering is guaranteed but the file doesn't fit in the heap, * processing a streaming resultset where the underlying database guarantees the ordering because of a sort clause.
1 parent 2837fdb commit 7e5521a

File tree

2 files changed

+77
-0
lines changed

2 files changed

+77
-0
lines changed

src/main/scala/scala/collection/decorators/IteratorDecorator.scala

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,44 @@ class IteratorDecorator[A](val `this`: Iterator[A]) extends AnyVal {
7272
loop(immutable.List.empty)
7373
}
7474

75+
/**
76+
* Constructs an iterator where consecutive elements are accumulated as
77+
* long as the output of f for each element doesn't change.
78+
* <pre>
79+
* Vector(1,2,2,3,3,3,2,2)
80+
* .iterator
81+
* .groupUntilChanged(identity)
82+
* .toList
83+
* </pre>
84+
* produces
85+
* <pre>
86+
* List(Seq(1),
87+
* Seq(2,2),
88+
* Seq(3,3,3),
89+
* Seq(2,2))
90+
* </pre>
91+
* @param f the function to compute a key for an element
92+
* @tparam K the type of the computed key
93+
* @return an iterator of sequences of the consecutive elements with the
94+
* same key in the original iterator
95+
*/
96+
def groupUntilChanged[K](f: A => K): Iterator[Iterable[A]] =
97+
new AbstractIterator[Seq[A]] {
98+
private var hd: Option[A] = `this`.nextOption()
99+
override def hasNext: Boolean = hd.isDefined
100+
101+
override def next(): Seq[A] = {
102+
hd match {
103+
case None => Iterator.empty.next()
104+
case Some(head) =>
105+
hd = `this`.nextOption()
106+
var seq = mutable.Buffer(head)
107+
while (hd.exists(el => f(el) == f(head))) {
108+
seq = seq ++ hd
109+
hd = `this`.nextOption()
110+
}
111+
seq.toVector
112+
}
113+
}
114+
}
75115
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
package scala.collection
2+
package decorators
3+
4+
import scala.util.Try
5+
6+
import org.junit.{Assert, Test}
7+
8+
class IteratorDecoratorTest {
9+
@Test
10+
def groupUntilChangedShouldHonorEmptyIterator(): Unit ={
11+
val groupedIterator = Iterator.empty.groupUntilChanged(identity)
12+
Assert.assertFalse(groupedIterator.hasNext)
13+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString) }
14+
@Test
15+
def groupUntilChangedShouldReturnSingleSeqWhenAllElHaveTheSameKey(): Unit ={
16+
val value = Vector("1", "1", "1")
17+
val groupedIterator = value.iterator.groupUntilChanged(identity)
18+
Assert.assertTrue(groupedIterator.hasNext)
19+
Assert.assertEquals(groupedIterator.next.toVector, value)
20+
Assert.assertFalse(groupedIterator.hasNext)
21+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString)
22+
}
23+
@Test
24+
def groupUntilChangedShouldReturnSeqOfConsecutiveElementsWithTheSameKey(): Unit ={
25+
val value = Vector("1", "2","2","3","3","3","2","2")
26+
val groupedIterator = value.iterator.groupUntilChanged(identity)
27+
Assert.assertTrue(groupedIterator.hasNext)
28+
Assert.assertEquals(groupedIterator.next.toVector, Vector("1"))
29+
Assert.assertTrue(groupedIterator.hasNext)
30+
Assert.assertEquals(groupedIterator.next.toVector, Vector("2","2"))
31+
Assert.assertTrue(groupedIterator.hasNext)
32+
Assert.assertEquals(groupedIterator.next.toVector, Vector("3","3","3"))
33+
Assert.assertTrue(groupedIterator.hasNext)
34+
Assert.assertEquals(groupedIterator.next.toVector, Vector("2","2"))
35+
Assert.assertFalse(groupedIterator.hasNext)
36+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString) }
37+
}

0 commit comments

Comments
 (0)