Skip to content

Commit 8f8ac74

Browse files
committed
adds splitBy extension method on scala.collection.Iterator
Iterator#groupUntilChanged constructs an iterator where consecutive elements of the original iterator are accumulated as long as the output of a key function for each element doesn't change. This operation makes sense as soon as you are trying to process an iterator where you know the elements will be sorted in a certain way and you need to group them without loading all the data in memory. For instance * processing a file where the ordering is guaranteed but the file doesn't fit in the heap, * processing a streaming resultset where the underlying database guarantees the ordering because of a sort clause.
1 parent 1a46759 commit 8f8ac74

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

src/main/scala/scala/collection/decorators/IteratorDecorator.scala

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,4 +72,63 @@ class IteratorDecorator[A](val `this`: Iterator[A]) extends AnyVal {
7272
loop(immutable.List.empty)
7373
}
7474

75+
/**
76+
* Constructs an iterator where consecutive elements are accumulated as
77+
* long as the output of f for each element doesn't change.
78+
* <pre>
79+
* Vector(1,2,2,3,3,3,2,2)
80+
* .iterator
81+
* .splitBy(identity)
82+
* .toList
83+
* </pre>
84+
* produces
85+
* <pre>
86+
* List(Seq(1),
87+
* Seq(2,2),
88+
* Seq(3,3,3),
89+
* Seq(2,2))
90+
* </pre>
91+
*
92+
* @param f the function to compute a key for an element
93+
* @tparam K the type of the computed key
94+
* @return an iterator of sequences of the consecutive elements with the
95+
* same key in the original iterator
96+
*/
97+
def splitBy[K](f: A => K): Iterator[Iterable[A]] =
98+
new AbstractIterator[Seq[A]] {
99+
private var hd: A = _
100+
private var hdDefined: Boolean = false
101+
102+
override def hasNext: Boolean = hdDefined || `this`.hasNext
103+
104+
override def next(): Seq[A] = {
105+
if (hasNext) {
106+
val seq = Vector.newBuilder[A]
107+
if (hdDefined) {
108+
seq += hd
109+
} else {
110+
hd = `this`.next()
111+
hdDefined = true
112+
seq += hd
113+
}
114+
var hadSameKey = true
115+
while (`this`.hasNext && hadSameKey) {
116+
val el = `this`.next()
117+
hdDefined = true
118+
if (f(el) == f(hd)) {
119+
seq += el
120+
} else {
121+
hadSameKey = false
122+
}
123+
hd = el
124+
}
125+
if (hadSameKey) {
126+
hdDefined = false
127+
}
128+
seq.result()
129+
} else {
130+
Iterator.empty.next()
131+
}
132+
}
133+
}
75134
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
package scala.collection
2+
package decorators
3+
4+
import org.junit.{Assert, Test}
5+
6+
import scala.util.Try
7+
8+
class IteratorDecoratorTest {
9+
@Test
10+
def splitByShouldHonorEmptyIterator(): Unit = {
11+
val groupedIterator = Iterator.empty.splitBy(identity)
12+
Assert.assertFalse(groupedIterator.hasNext)
13+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString)
14+
}
15+
16+
@Test
17+
def splitByShouldReturnIteratorOfSingleSeqWhenAllElHaveTheSameKey(): Unit = {
18+
val value = Vector("1", "1", "1")
19+
val groupedIterator = value.iterator.splitBy(identity)
20+
Assert.assertTrue(groupedIterator.hasNext)
21+
Assert.assertEquals(groupedIterator.next.toVector, value)
22+
Assert.assertFalse(groupedIterator.hasNext)
23+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString)
24+
}
25+
26+
@Test
27+
def splitByShouldReturnIteratorOfSeqOfConsecutiveElementsWithTheSameKey(): Unit = {
28+
val value = Vector("1", "2", "2", "3", "3", "3", "2", "2")
29+
val groupedIterator = value.iterator.splitBy(identity)
30+
Assert.assertTrue(groupedIterator.hasNext)
31+
Assert.assertEquals(groupedIterator.next.toVector, Vector("1"))
32+
Assert.assertTrue(groupedIterator.hasNext)
33+
Assert.assertEquals(groupedIterator.next.toVector, Vector("2", "2"))
34+
Assert.assertTrue(groupedIterator.hasNext)
35+
Assert.assertEquals(groupedIterator.next.toVector, Vector("3", "3", "3"))
36+
Assert.assertTrue(groupedIterator.hasNext)
37+
Assert.assertEquals(groupedIterator.next.toVector, Vector("2", "2"))
38+
Assert.assertFalse(groupedIterator.hasNext)
39+
Assert.assertEquals(Try(groupedIterator.next).toString, Try(Iterator.empty.next()).toString)
40+
}
41+
}

0 commit comments

Comments
 (0)