Skip to content

Commit 6e3b580

Browse files
committed
Use mumur hashing for tasty uuid.
1 parent b6a6c47 commit 6e3b580

File tree

2 files changed

+264
-6
lines changed

2 files changed

+264
-6
lines changed

compiler/src/dotty/tools/dotc/core/tasty/TastyPickler.scala

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package tasty
66
import TastyFormat._
77
import collection.mutable
88
import TastyBuffer._
9+
import util.MurmurLongHash3
910
import core.Symbols.Symbol
1011
import ast.tpd
1112
import Decorators._
@@ -25,8 +26,8 @@ class TastyPickler {
2526
buf.length + natSize(buf.length)
2627
}
2728

28-
val uuidLow: Long = longHash(nameBuffer.bytes)
29-
val uuidHi: Long = sections.iterator.map(x => longHash(x._2.bytes)).fold(0L)(_ ^ _)
29+
val uuidLow: Long = MurmurLongHash3.bytesHash(nameBuffer.bytes)
30+
val uuidHi: Long = sections.iterator.map(x => MurmurLongHash3.bytesHash(x._2.bytes)).fold(0L)(_ ^ _)
3031

3132
val headerBuffer = {
3233
val buf = new TastyBuffer(header.length + 24)
@@ -71,8 +72,4 @@ class TastyPickler {
7172
var addrOfSym: Symbol => Option[Addr] = (_ => None)
7273

7374
val treePkl = new TreePickler(this)
74-
75-
private def longHash(arr: Array[Byte], i: Int = 0, acc: Long = 1): Long =
76-
if (i < arr.length) longHash(arr, i + 1, 31L * acc + arr(i)) else acc
77-
7875
}
Lines changed: 261 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,261 @@
1+
/* __ *\
2+
** ________ ___ / / ___ Scala API **
3+
** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
4+
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
5+
** /____/\___/_/ |_/____/_/ | | **
6+
** |/ **
7+
\* */
8+
9+
/* Copied and adapted from scala.util.hashing.MurmurHash3 to use Longs */
10+
11+
package dotty.tools.dotc.util
12+
13+
import java.lang.Long.{ rotateLeft => rotl }
14+
15+
private[util] class MurmurLongHash3 {
16+
/** Mix in a block of data into an intermediate hash value. */
17+
final def mix(hash: Long, data: Long): Long = {
18+
var h = mixLast(hash, data)
19+
h = rotl(h, 13)
20+
h * 5L + 0x96cd1c3532ac3b17L
21+
}
22+
23+
/** May optionally be used as the last mixing step. Is a little bit faster than mix,
24+
* as it does no further mixing of the resulting hash. For the last element this is not
25+
* necessary as the hash is thoroughly mixed during finalization anyway. */
26+
final def mixLast(hash: Long, data: Long): Long = {
27+
var k = data
28+
29+
k *= 0x239b961bab0e9789L
30+
k = rotl(k, 15)
31+
k *= 0x38b34ae5a1e38b93L
32+
33+
hash ^ k
34+
}
35+
36+
/** Finalize a hash to incorporate the length and make sure all bits avalanche. */
37+
final def finalizeHash(hash: Long, length: Long): Long = avalanche(hash ^ length)
38+
39+
/** Force all bits of the hash to avalanche. Used for finalizing the hash. */
40+
private final def avalanche(hash: Long): Long = {
41+
var h = hash
42+
43+
h ^= h >>> 33
44+
h *= 0xff51afd7ed558ccdL
45+
h ^= h >>> 33
46+
h *= 0xc4ceb9fe1a85ec53L
47+
h ^= h >>> 33
48+
49+
h
50+
}
51+
52+
/** Compute the hash of a product */
53+
final def productHash(x: Product, seed: Long): Long = {
54+
val arr = x.productArity
55+
// Case objects have the hashCode inlined directly into the
56+
// synthetic hashCode method, but this method should still give
57+
// a correct result if passed a case object.
58+
if (arr == 0) {
59+
x.productPrefix.hashCode
60+
}
61+
else {
62+
var h = seed
63+
var i = 0
64+
while (i < arr) {
65+
h = mix(h, x.productElement(i).##)
66+
i += 1
67+
}
68+
finalizeHash(h, arr)
69+
}
70+
}
71+
72+
/** Compute the hash of a string */
73+
final def stringHash(str: String, seed: Long): Long = {
74+
var h = seed
75+
var i = 0
76+
while (i + 1 < str.length) {
77+
val data = (str.charAt(i) << 16) + str.charAt(i + 1)
78+
h = mix(h, data)
79+
i += 2
80+
}
81+
if (i < str.length) h = mixLast(h, str.charAt(i).toLong)
82+
finalizeHash(h, str.length)
83+
}
84+
85+
/** Compute a hash that is symmetric in its arguments - that is a hash
86+
* where the order of appearance of elements does not matter.
87+
* This is useful for hashing sets, for example.
88+
*/
89+
final def unorderedHash(xs: TraversableOnce[Any], seed: Long): Long = {
90+
var a, b, n = 0L
91+
var c = 1
92+
xs foreach { x =>
93+
val h = x.##
94+
a += h
95+
b ^= h
96+
if (h != 0) c *= h
97+
n += 1
98+
}
99+
var h = seed
100+
h = mix(h, a)
101+
h = mix(h, b)
102+
h = mixLast(h, c)
103+
finalizeHash(h, n)
104+
}
105+
/** Compute a hash that depends on the order of its arguments.
106+
*/
107+
final def orderedHash(xs: TraversableOnce[Any], seed: Long): Long = {
108+
var n = 0
109+
var h = seed
110+
xs foreach { x =>
111+
h = mix(h, x.##)
112+
n += 1
113+
}
114+
finalizeHash(h, n)
115+
}
116+
117+
/** Compute the hash of an array.
118+
*/
119+
final def arrayHash[@specialized T](a: Array[T], seed: Long): Long = {
120+
var h = seed
121+
var i = 0
122+
while (i < a.length) {
123+
h = mix(h, a(i).##)
124+
i += 1
125+
}
126+
finalizeHash(h, a.length)
127+
}
128+
129+
/** Compute the hash of a byte array. Faster than arrayHash, because
130+
* it hashes 4 bytes at once.
131+
*/
132+
final def bytesHash(data: Array[Byte], seed: Long): Long = {
133+
var len = data.length
134+
var h = seed
135+
136+
// Body
137+
var i = 0
138+
while(len >= 8) {
139+
var k = data(i + 0) & 0xFF
140+
k |= (data(i + 1) & 0xFF) << 8
141+
k |= (data(i + 2) & 0xFF) << 16
142+
k |= (data(i + 3) & 0xFF) << 24
143+
k |= (data(i + 4) & 0xFF) << 32
144+
k |= (data(i + 5) & 0xFF) << 40
145+
k |= (data(i + 6) & 0xFF) << 48
146+
k |= (data(i + 7) & 0xFF) << 56
147+
148+
h = mix(h, k)
149+
150+
i += 8
151+
len -= 8
152+
}
153+
154+
// Tail
155+
var k = 0
156+
if(len == 7) k ^= (data(i + 6) & 0xFF) << 48
157+
if(len >= 6) k ^= (data(i + 5) & 0xFF) << 40
158+
if(len >= 5) k ^= (data(i + 4) & 0xFF) << 32
159+
if(len >= 4) k ^= (data(i + 3) & 0xFF) << 24
160+
if(len >= 3) k ^= (data(i + 2) & 0xFF) << 16
161+
if(len >= 2) k ^= (data(i + 1) & 0xFF) << 8
162+
if(len >= 1) {
163+
k ^= (data(i + 0) & 0xFF)
164+
h = mixLast(h, k)
165+
}
166+
167+
// Finalization
168+
finalizeHash(h, data.length)
169+
}
170+
171+
final def listHash(xs: scala.collection.immutable.List[_], seed: Long): Long = {
172+
var n = 0
173+
var h = seed
174+
var elems = xs
175+
while (!elems.isEmpty) {
176+
val head = elems.head
177+
val tail = elems.tail
178+
h = mix(h, head.##)
179+
n += 1
180+
elems = tail
181+
}
182+
finalizeHash(h, n)
183+
}
184+
}
185+
186+
/**
187+
* An implementation of Austin Appleby's MurmurHash 3 algorithm
188+
* (MurmurHash3_x86_32). This object contains methods that hash
189+
* values of various types as well as means to construct `Hashing`
190+
* objects.
191+
*
192+
* This algorithm is designed to generate well-distributed non-cryptographic
193+
* hashes. It is designed to hash data in 32 bit chunks (ints).
194+
*
195+
* The mix method needs to be called at each step to update the intermediate
196+
* hash value. For the last chunk to incorporate into the hash mixLast may
197+
* be used instead, which is slightly faster. Finally finalizeHash needs to
198+
* be called to compute the final hash value.
199+
*
200+
* This is based on the earlier MurmurHash3 code by Rex Kerr, but the
201+
* MurmurHash3 algorithm was since changed by its creator Austin Appleby
202+
* to remedy some weaknesses and improve performance. This represents the
203+
* latest and supposedly final version of the algortihm (revision 136).
204+
*
205+
* @see [[http://code.google.com/p/smhasher]]
206+
*/
207+
object MurmurLongHash3 extends MurmurLongHash3 {
208+
final val arraySeed = 0x3c074a61
209+
final val stringSeed = 0xf7ca7fd2
210+
final val productSeed = 0xcafebabe
211+
final val symmetricSeed = 0xb592f7ae
212+
final val traversableSeed = 0xe73a8b15
213+
final val seqSeed = "Seq".hashCode
214+
final val mapSeed = "Map".hashCode
215+
final val setSeed = "Set".hashCode
216+
217+
def arrayHash[@specialized T](a: Array[T]): Long = arrayHash(a, arraySeed)
218+
def bytesHash(data: Array[Byte]): Long = bytesHash(data, arraySeed)
219+
def orderedHash(xs: TraversableOnce[Any]): Long = orderedHash(xs, symmetricSeed)
220+
def productHash(x: Product): Long = productHash(x, productSeed)
221+
def stringHash(x: String): Long = stringHash(x, stringSeed)
222+
def unorderedHash(xs: TraversableOnce[Any]): Long = unorderedHash(xs, traversableSeed)
223+
224+
/** To offer some potential for optimization.
225+
*/
226+
def seqHash(xs: scala.collection.Seq[_]): Long = xs match {
227+
case xs: List[_] => listHash(xs, seqSeed)
228+
case xs => orderedHash(xs, seqSeed)
229+
}
230+
231+
def mapHash(xs: scala.collection.Map[_, _]): Long = unorderedHash(xs, mapSeed)
232+
def setHash(xs: scala.collection.Set[_]): Long = unorderedHash(xs, setSeed)
233+
234+
/* Need to adapt Hashing[_]
235+
class ArrayHashing[@specialized T] extends Hashing[Array[T]] {
236+
def hash(a: Array[T]) = arrayHash(a)
237+
}
238+
239+
def arrayHashing[@specialized T] = new ArrayHashing[T]
240+
241+
def bytesHashing = new Hashing[Array[Byte]] {
242+
def hash(data: Array[Byte]) = bytesHash(data)
243+
}
244+
245+
def orderedHashing = new Hashing[TraversableOnce[Any]] {
246+
def hash(xs: TraversableOnce[Any]) = orderedHash(xs)
247+
}
248+
249+
def productHashing = new Hashing[Product] {
250+
def hash(x: Product) = productHash(x)
251+
}
252+
253+
def stringHashing = new Hashing[String] {
254+
def hash(x: String) = stringHash(x)
255+
}
256+
257+
def unorderedHashing = new Hashing[TraversableOnce[Any]] {
258+
def hash(xs: TraversableOnce[Any]) = unorderedHash(xs)
259+
}
260+
*/
261+
}

0 commit comments

Comments
 (0)