Skip to content

Commit b5bf5e8

Browse files
ofrobotsMylesBorins
authored andcommitted
test: verify hash seed uniqueness
This tests that the hash seed used by V8 for hashing is random. PR-URL: https://github.com/nodejs/node-private/pull/84 Reviewed-By: Ben Noordhuis <[email protected]> Reviewed-By: Michael Dawson <[email protected]> Reviewed-By: Fedor Indutny <[email protected]>
1 parent ff587de commit b5bf5e8

File tree

3 files changed

+200
-2
lines changed

3 files changed

+200
-2
lines changed

Makefile

+5-2
Original file line numberDiff line numberDiff line change
@@ -349,6 +349,9 @@ test-node-inspect: $(NODE_EXE)
349349
test-tick-processor: all
350350
$(PYTHON) tools/test.py tick-processor
351351

352+
test-hash-seed: all
353+
$(NODE) test/pummel/test-hash-seed.js
354+
352355
test-known-issues: all
353356
$(PYTHON) tools/test.py known_issues
354357

@@ -375,7 +378,7 @@ test-timers-clean:
375378

376379

377380
ifneq ("","$(wildcard deps/v8/tools/run-tests.py)")
378-
test-v8: v8
381+
test-v8: v8 test-hash-seed
379382
# note: performs full test unless QUICKCHECK is specified
380383
deps/v8/tools/run-tests.py --arch=$(V8_ARCH) \
381384
--mode=$(BUILDTYPE_LOWER) $(V8_TEST_OPTIONS) $(QUICKCHECK_ARG) \
@@ -879,4 +882,4 @@ endif
879882
test-v8-intl test-v8-benchmarks test-v8-all v8 lint-ci bench-ci jslint-ci \
880883
doc-only $(TARBALL)-headers test-ci test-ci-native test-ci-js build-ci \
881884
clear-stalled coverage-clean coverage-build coverage-test coverage \
882-
list-gtests
885+
list-gtests test-hash-seed

test/fixtures/guess-hash-seed.js

+175
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
/* eslint-disable required-modules */
2+
'use strict';
3+
function min(arr) {
4+
let res = arr[0];
5+
for (let i = 1; i < arr.length; i++) {
6+
const val = arr[i];
7+
if (val < res)
8+
res = val;
9+
}
10+
return res;
11+
}
12+
function run_repeated(n, fn) {
13+
const res = [];
14+
for (let i = 0; i < n; i++) res.push(fn());
15+
return res;
16+
}
17+
18+
const INT_MAX = 0x7fffffff;
19+
20+
// from src/js/collection.js
21+
// key must be a signed 32-bit number!
22+
function ComputeIntegerHash(key/*, seed*/) {
23+
let hash = key;
24+
hash = hash ^ 0/*seed*/;
25+
hash = ~hash + (hash << 15); // hash = (hash << 15) - hash - 1;
26+
hash = hash ^ (hash >>> 12);
27+
hash = hash + (hash << 2);
28+
hash = hash ^ (hash >>> 4);
29+
hash = (hash * 2057) | 0; // hash = (hash + (hash << 3)) + (hash << 11);
30+
hash = hash ^ (hash >>> 16);
31+
return hash & 0x3fffffff;
32+
}
33+
34+
const kNofHashBitFields = 2;
35+
const kHashShift = kNofHashBitFields;
36+
const kHashBitMask = 0xffffffff >>> kHashShift;
37+
const kZeroHash = 27;
38+
39+
function string_to_array(str) {
40+
const res = new Array(str.length);
41+
for (let i = 0; i < str.length; i++) {
42+
res[i] = str.charCodeAt(i);
43+
}
44+
return res;
45+
}
46+
47+
function gen_specialized_hasher(str) {
48+
const str_arr = string_to_array(str);
49+
return Function('seed', `
50+
var running_hash = seed;
51+
${str_arr.map((c) => `
52+
running_hash += ${c};
53+
running_hash &= 0xffffffff;
54+
running_hash += (running_hash << 10);
55+
running_hash &= 0xffffffff;
56+
running_hash ^= (running_hash >>> 6);
57+
running_hash &= 0xffffffff;
58+
`).join('')}
59+
running_hash += (running_hash << 3);
60+
running_hash &= 0xffffffff;
61+
running_hash ^= (running_hash >>> 11);
62+
running_hash &= 0xffffffff;
63+
running_hash += (running_hash << 15);
64+
running_hash &= 0xffffffff;
65+
if ((running_hash & ${kHashBitMask}) == 0) {
66+
return ${kZeroHash};
67+
}
68+
return running_hash;
69+
`);
70+
}
71+
72+
// adapted from HashToEntry
73+
function hash_to_bucket(hash, numBuckets) {
74+
return (hash & ((numBuckets) - 1));
75+
}
76+
77+
function time_set_lookup(set, value) {
78+
const t1 = process.hrtime();
79+
for (let i = 0; i < 100; i++) {
80+
// annoyingly, SetHas() is JS code and therefore potentially optimizable.
81+
// However, SetHas() looks up the table using native code, and it seems like
82+
// that's sufficient to prevent the optimizer from doing anything?
83+
set.has(value);
84+
}
85+
const t = process.hrtime(t1);
86+
const secs = t[0];
87+
const nanos = t[1];
88+
return secs * 1e9 + nanos;
89+
}
90+
91+
// Set with 256 buckets; bucket 0 full, others empty
92+
const tester_set_buckets = 256;
93+
const tester_set = new Set();
94+
let tester_set_treshold;
95+
(function() {
96+
// fill bucket 0 and find extra numbers mapping to bucket 0 and a different
97+
// bucket `capacity == numBuckets * 2`
98+
let needed = Math.floor(tester_set_buckets * 1.5) + 1;
99+
let positive_test_value;
100+
let negative_test_value;
101+
for (let i = 0; true; i++) {
102+
if (i > INT_MAX) throw new Error('i too high');
103+
if (hash_to_bucket(ComputeIntegerHash(i), tester_set_buckets) !== 0) {
104+
negative_test_value = i;
105+
break;
106+
}
107+
}
108+
for (let i = 0; needed > 0; i++) {
109+
if (i > INT_MAX) throw new Error('i too high');
110+
if (hash_to_bucket(ComputeIntegerHash(i), tester_set_buckets) === 0) {
111+
needed--;
112+
if (needed == 0) {
113+
positive_test_value = i;
114+
} else {
115+
tester_set.add(i);
116+
}
117+
}
118+
}
119+
120+
// calibrate Set access times for accessing the full bucket / an empty bucket
121+
const pos_time =
122+
min(run_repeated(10000, time_set_lookup.bind(null, tester_set,
123+
positive_test_value)));
124+
const neg_time =
125+
min(run_repeated(10000, time_set_lookup.bind(null, tester_set,
126+
negative_test_value)));
127+
tester_set_treshold = (pos_time + neg_time) / 2;
128+
// console.log(`pos_time: ${pos_time}, neg_time: ${neg_time},`,
129+
// `threshold: ${tester_set_treshold}`);
130+
})();
131+
132+
// determine hash seed
133+
const slow_str_gen = (function*() {
134+
let strgen_i = 0;
135+
outer:
136+
while (1) {
137+
const str = '#' + (strgen_i++);
138+
for (let i = 0; i < 1000; i++) {
139+
if (time_set_lookup(tester_set, str) < tester_set_treshold)
140+
continue outer;
141+
}
142+
yield str;
143+
}
144+
})();
145+
146+
const first_slow_str = slow_str_gen.next().value;
147+
// console.log('first slow string:', first_slow_str);
148+
const first_slow_str_special_hasher = gen_specialized_hasher(first_slow_str);
149+
let seed_candidates = [];
150+
//var t_before_first_seed_brute = performance.now();
151+
for (let seed_candidate = 0; seed_candidate < 0x100000000; seed_candidate++) {
152+
if (hash_to_bucket(first_slow_str_special_hasher(seed_candidate),
153+
tester_set_buckets) == 0) {
154+
seed_candidates.push(seed_candidate);
155+
}
156+
}
157+
// console.log(`got ${seed_candidates.length} candidates`);
158+
// after ${performance.now()-t_before_first_seed_brute}
159+
while (seed_candidates.length > 1) {
160+
const slow_str = slow_str_gen.next().value;
161+
const special_hasher = gen_specialized_hasher(slow_str);
162+
const new_seed_candidates = [];
163+
for (const seed_candidate of seed_candidates) {
164+
if (hash_to_bucket(special_hasher(seed_candidate), tester_set_buckets) ==
165+
0) {
166+
new_seed_candidates.push(seed_candidate);
167+
}
168+
}
169+
seed_candidates = new_seed_candidates;
170+
// console.log(`reduced to ${seed_candidates.length} candidates`);
171+
}
172+
if (seed_candidates.length != 1)
173+
throw new Error('no candidates remaining');
174+
const seed = seed_candidates[0];
175+
console.log(seed);

test/pummel/test-hash-seed.js

+20
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
'use strict';
2+
3+
const REPETITIONS = 2;
4+
5+
const assert = require('assert');
6+
const common = require('../common');
7+
const cp = require('child_process');
8+
const path = require('path');
9+
const targetScript = path.resolve(common.fixturesDir, 'guess-hash-seed.js');
10+
const seeds = [];
11+
12+
for (let i = 0; i < REPETITIONS; ++i) {
13+
const seed = cp.spawnSync(process.execPath, [targetScript],
14+
{ encoding: 'utf8' }).stdout.trim();
15+
seeds.push(seed);
16+
}
17+
18+
console.log(`Seeds: ${seeds}`);
19+
const hasDuplicates = (new Set(seeds)).size !== seeds.length;
20+
assert.strictEqual(hasDuplicates, false);

0 commit comments

Comments
 (0)