Skip to content

Commit 5fd1ae2

Browse files
committed
Optimise HashSet bulk operations mostly focussed around ++ and union
EmptySet and SetBuilder adjusted to take advantage of ++ optimisation for simple and common cases eliminate unneeded allocations for HashSets where the result is already built for -    subSet ++ superSet    subSet union superSet    superSet union subSet    superSet ++ subSet make a fast path when there is structural sharing in the HashSet for union, guarantee internal operations will only return a new HashSet if one of the existing HashSet parameters or internal values cant be used use System.arraycopy rather than Array.copy as it avoid JVM nulling the array add missing `eq` fast path to intersect0 and diff0 minor improvements to + to reduce allocations reduce calls to HashSet.size which can be a bottleneck reduce allocations in ListSet ++, + and - no allocations in ++ or + if the data is contained in the original sets HashSetCollision1 stores its length so as to avoid calls to ListSet.size which is O(n) take advantage of ListSet guarantees on identity for return or + or - where we can avoid calling ListSet.size where we can avoid it
1 parent c003048 commit 5fd1ae2

File tree

6 files changed

+516
-256
lines changed

6 files changed

+516
-256
lines changed

library/src/scala/collection/IterableLike.scala

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ self =>
291291
b.result()
292292
}
293293

294-
def sameElements[B >: A](that: GenIterable[B]): Boolean = {
294+
def sameElements[B >: A](that: GenIterable[B]): Boolean = (this.asInstanceOf[AnyRef] eq that.asInstanceOf[AnyRef]) || {
295295
that match {
296296
case thatVector: Vector[_] if this.isInstanceOf[Vector[_]] =>
297297
val thisVector = this.asInstanceOf[Vector[_]]
@@ -307,6 +307,9 @@ self =>
307307
}
308308
equal
309309
}
310+
case thatSet: GenSet[A] if this.isInstanceOf[GenSetLike[A,_]]=>
311+
val thisSet = this.asInstanceOf[GenSetLike[A,_]]
312+
thisSet.size == thatSet.size && thisSet.subsetOf(thatSet)
310313

311314
case _ =>
312315
val these = this.iterator

library/src/scala/collection/SetLike.scala

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,35 @@ self =>
150150
* @param elems the collection containing the elements to add.
151151
* @return a new $coll with the given elements added, omitting duplicates.
152152
*/
153-
def ++ (elems: GenTraversableOnce[A]): This = (repr /: elems.seq)(_ + _)
153+
def ++ (elems: GenTraversableOnce[A]): This = {
154+
import immutable.HashSet
155+
//in 2.14 this should be moved to the appropriate place - HashSet and EmptySet.
156+
//we can't break binary comparability before then
157+
this match {
158+
case _ if this eq immutable.Set.empty.asInstanceOf[AnyRef] =>
159+
import immutable.Set.{Set1, Set2, Set3, Set4}
160+
elems match {
161+
case hs: HashSet[A] if hs.size > 4 => hs.asInstanceOf[This]
162+
case hs: Set1[A] => hs.asInstanceOf[This]
163+
case hs: Set2[A] => hs.asInstanceOf[This]
164+
case hs: Set3[A] => hs.asInstanceOf[This]
165+
case hs: Set4[A] => hs.asInstanceOf[This]
166+
case _ =>
167+
if (elems.isEmpty) this.asInstanceOf[This]
168+
else (repr /: elems.seq) (_ + _)
169+
}
170+
case hs: immutable.HashSet[A] =>
171+
elems match {
172+
case that: GenSet[A] =>
173+
hs.union(that).asInstanceOf[This]
174+
case _ =>
175+
(repr /: elems.seq) (_ + _)
176+
}
177+
case _ =>
178+
(repr /: elems.seq) (_ + _)
179+
180+
}
181+
}
154182

155183
/** Creates a new set with a given element removed from this set.
156184
*

library/src/scala/collection/TraversableLike.scala

Lines changed: 45 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,11 +145,29 @@ trait TraversableLike[+A, +Repr] extends Any
145145
def hasDefiniteSize = true
146146

147147
def ++[B >: A, That](that: GenTraversableOnce[B])(implicit bf: CanBuildFrom[Repr, B, That]): That = {
148-
val b = bf(repr)
149-
if (that.isInstanceOf[IndexedSeqLike[_, _]]) b.sizeHint(this, that.seq.size)
150-
b ++= thisCollection
151-
b ++= that.seq
152-
b.result
148+
def defaultPlusPlus: That = {
149+
val b = bf(repr)
150+
if (that.isInstanceOf[IndexedSeqLike[_, _]]) b.sizeHint(this, that.seq.size)
151+
b ++= thisCollection
152+
b ++= that.seq
153+
b.result
154+
}
155+
156+
if (bf eq immutable.Set.canBuildFrom) {
157+
this match {
158+
case s: immutable.Set[A] if that.isInstanceOf[GenSet[A]] =>
159+
(s union that.asInstanceOf[GenSet[A]]).asInstanceOf[That]
160+
case _ => defaultPlusPlus
161+
}
162+
} else if (bf eq immutable.HashSet.canBuildFrom) {
163+
this match {
164+
case s: immutable.HashSet[A] if that.isInstanceOf[GenSet[A]] =>
165+
(s union that.asInstanceOf[GenSet[A]]).asInstanceOf[That]
166+
case _ => defaultPlusPlus
167+
}
168+
169+
} else defaultPlusPlus
170+
153171
}
154172

155173
/** As with `++`, returns a new collection containing the elements from the left operand followed by the
@@ -185,11 +203,28 @@ trait TraversableLike[+A, +Repr] extends Any
185203
* followed by all elements of `that`.
186204
*/
187205
def ++:[B >: A, That](that: TraversableOnce[B])(implicit bf: CanBuildFrom[Repr, B, That]): That = {
188-
val b = bf(repr)
189-
if (that.isInstanceOf[IndexedSeqLike[_, _]]) b.sizeHint(this, that.size)
190-
b ++= that
191-
b ++= thisCollection
192-
b.result
206+
def defaultPlusPlus: That = {
207+
val b = bf(repr)
208+
if (that.isInstanceOf[IndexedSeqLike[_, _]]) b.sizeHint(this, that.size)
209+
b ++= that
210+
b ++= thisCollection
211+
b.result
212+
}
213+
if (bf eq immutable.Set.canBuildFrom) {
214+
this match {
215+
case s: immutable.Set[A] if that.isInstanceOf[GenSet[A]] =>
216+
(s union that.asInstanceOf[GenSet[A]]).asInstanceOf[That]
217+
case _ => defaultPlusPlus
218+
}
219+
} else if (bf eq immutable.HashSet.canBuildFrom) {
220+
this match {
221+
case s: immutable.HashSet[A] if that.isInstanceOf[GenSet[A]] =>
222+
(s union that.asInstanceOf[GenSet[A]]).asInstanceOf[That]
223+
case _ => defaultPlusPlus
224+
}
225+
226+
} else defaultPlusPlus
227+
193228
}
194229

195230
/** As with `++`, returns a new collection containing the elements from the

0 commit comments

Comments
 (0)