Compress amount of hashed bytes for isize values in StableHasher

Kobzol · Kobzol · commit 8de59be93302 · 2022-01-30T09:52:44.000+01:00
diff --git a/compiler/rustc_data_structures/src/stable_hasher.rs b/compiler/rustc_data_structures/src/stable_hasher.rs
@@ -137,7 +137,35 @@ impl Hasher for StableHasher {
         // platforms. This is important for symbol hashes when cross compiling,
         // for example. Sign extending here is preferable as it means that the
         // same negative number hashes the same on both 32 and 64 bit platforms.
-        self.state.write_i64((i as i64).to_le());
+        let value = (i as i64).to_le() as u64;
+
+        // Cold path
+        #[cold]
+        #[inline(never)]
+        fn hash_value(state: &mut SipHasher128, value: u64) {
+            state.write_u8(0xFF);
+            state.write_u64(value);
+        }
+
+        // `isize` values often seem to have a small (positive) numeric value in practice.
+        // To exploit this, if the value is small, we will hash a smaller amount of bytes.
+        // However, we cannot just skip the leading zero bytes, as that would produce the same hash
+        // e.g. if you hash two values that have the same bit pattern when they are swapped.
+        // See https://github.com/rust-lang/rust/pull/93014 for context.
+        //
+        // Therefore, we employ the following strategy:
+        // 1) When we encounter a value that fits within a single byte (the most common case), we
+        // hash just that byte. This is the most common case that is being optimized. However, we do
+        // not do this for the value 0xFF, as that is a reserved prefix (a bit like in UTF-8).
+        // 2) When we encounter a larger value, we hash a "marker" 0xFF and then the corresponding
+        // 8 bytes. Since this prefix cannot occur when we hash a single byte, when we hash two
+        // `isize`s that fit within a different amount of bytes, they should always produce a different
+        // byte stream for the hasher.
+        if value < 0xFF {
+            self.state.write_u8(value as u8);
+        } else {
+            hash_value(&mut self.state, value);
+        }
     }
 }
 
diff --git a/compiler/rustc_data_structures/src/stable_hasher/tests.rs b/compiler/rustc_data_structures/src/stable_hasher/tests.rs
@@ -39,7 +39,7 @@ fn test_hash_integers() {
     test_isize.hash(&mut h);
 
     // This depends on the hashing algorithm. See note at top of file.
-    let expected = (2736651863462566372, 8121090595289675650);
+    let expected = (1784307454142909076, 11471672289340283879);
 
     assert_eq!(h.finalize(), expected);
 }
@@ -67,7 +67,7 @@ fn test_hash_isize() {
     test_isize.hash(&mut h);
 
     // This depends on the hashing algorithm. See note at top of file.
-    let expected = (14721296605626097289, 11385941877786388409);
+    let expected = (2789913510339652884, 674280939192711005);
 
     assert_eq!(h.finalize(), expected);
 }
@@ -140,3 +140,23 @@ fn test_attribute_permutation() {
     test_type!(i64);
     test_type!(i128);
 }
+
+// Check that the `isize` hashing optimization does not produce the same hash when permuting two
+// values.
+#[test]
+fn test_isize_compression() {
+    fn check_hash(a: u64, b: u64) {
+        let hash_a = hash(&(a as isize, b as isize));
+        let hash_b = hash(&(b as isize, a as isize));
+        assert_ne!(
+            hash_a, hash_b,
+            "The hash stayed the same when permuting values `{a}` and `{b}!",
+        );
+    }
+
+    check_hash(0xAA, 0xAAAA);
+    check_hash(0xFF, 0xFFFF);
+    check_hash(0xAAAA, 0xAAAAAA);
+    check_hash(0xAAAAAA, 0xAAAAAAAA);
+    check_hash(0xFF, 0xFFFFFFFFFFFFFFFF);
+}
diff --git a/src/test/debuginfo/function-names.rs b/src/test/debuginfo/function-names.rs
@@ -37,7 +37,7 @@
 // Const generic parameter
 // gdb-command:info functions -q function_names::const_generic_fn.*
 // gdb-check:[...]static fn function_names::const_generic_fn_bool<false>();
-// gdb-check:[...]static fn function_names::const_generic_fn_non_int<{CONST#fe3cfa0214ac55c7}>();
+// gdb-check:[...]static fn function_names::const_generic_fn_non_int<{CONST#3fcd7c34c1555be6}>();
 // gdb-check:[...]static fn function_names::const_generic_fn_signed_int<-7>();
 // gdb-check:[...]static fn function_names::const_generic_fn_unsigned_int<14>();
 
@@ -76,7 +76,7 @@
 // Const generic parameter
 // cdb-command:x a!function_names::const_generic_fn*
 // cdb-check:[...] a!function_names::const_generic_fn_bool<false> (void)
-// cdb-check:[...] a!function_names::const_generic_fn_non_int<CONST$fe3cfa0214ac55c7> (void)
+// cdb-check:[...] a!function_names::const_generic_fn_non_int<CONST$3fcd7c34c1555be6> (void)
 // cdb-check:[...] a!function_names::const_generic_fn_unsigned_int<14> (void)
 // cdb-check:[...] a!function_names::const_generic_fn_signed_int<-7> (void)
 
diff --git a/src/test/ui/symbol-names/basic.legacy.stderr b/src/test/ui/symbol-names/basic.legacy.stderr
@@ -1,10 +1,10 @@
-error: symbol-name(_ZN5basic4main17h7c2c715a9b77648bE)
+error: symbol-name(_ZN5basic4main17h611df9c6948c15f7E)
   --> $DIR/basic.rs:8:1
    |
 LL | #[rustc_symbol_name]
    | ^^^^^^^^^^^^^^^^^^^^
 
-error: demangling(basic::main::h7c2c715a9b77648b)
+error: demangling(basic::main::h611df9c6948c15f7)
   --> $DIR/basic.rs:8:1
    |
 LL | #[rustc_symbol_name]
diff --git a/src/test/ui/symbol-names/issue-60925.legacy.stderr b/src/test/ui/symbol-names/issue-60925.legacy.stderr
@@ -1,10 +1,10 @@
-error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h419983d0842a72aeE)
+error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h5425dadb5b1e5fb6E)
   --> $DIR/issue-60925.rs:21:9
    |
 LL |         #[rustc_symbol_name]
    |         ^^^^^^^^^^^^^^^^^^^^
 
-error: demangling(issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo::h419983d0842a72ae)
+error: demangling(issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo::h5425dadb5b1e5fb6)
   --> $DIR/issue-60925.rs:21:9
    |
 LL |         #[rustc_symbol_name]

Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,10 @@`
`1`		`-error: symbol-name(_ZN5basic4main17h7c2c715a9b77648bE)`
	`1`	`+error: symbol-name(_ZN5basic4main17h611df9c6948c15f7E)`
`2`	`2`	`--> $DIR/basic.rs:8:1`
`3`	`3`	`\|`
`4`	`4`	`LL \| #[rustc_symbol_name]`
`5`	`5`	`\| ^^^^^^^^^^^^^^^^^^^^`
`6`	`6`
`7`		`-error: demangling(basic::main::h7c2c715a9b77648b)`
	`7`	`+error: demangling(basic::main::h611df9c6948c15f7)`
`8`	`8`	`--> $DIR/basic.rs:8:1`
`9`	`9`	`\|`
`10`	`10`	`LL \| #[rustc_symbol_name]`
Original file line number	Diff line number	Diff line change
`@@ -1,10 +1,10 @@`
`1`		`-error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h419983d0842a72aeE)`
	`1`	`+error: symbol-name(_ZN11issue_609253foo37Foo$LT$issue_60925..llv$u6d$..Foo$GT$3foo17h5425dadb5b1e5fb6E)`
`2`	`2`	`--> $DIR/issue-60925.rs:21:9`
`3`	`3`	`\|`
`4`	`4`	`LL \| #[rustc_symbol_name]`
`5`	`5`	`\| ^^^^^^^^^^^^^^^^^^^^`
`6`	`6`
`7`		`-error: demangling(issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo::h419983d0842a72ae)`
	`7`	`+error: demangling(issue_60925::foo::Foo<issue_60925::llvm::Foo>::foo::h5425dadb5b1e5fb6)`
`8`	`8`	`--> $DIR/issue-60925.rs:21:9`
`9`	`9`	`\|`
`10`	`10`	`LL \| #[rustc_symbol_name]`