Skip to content

Commit d061fee

Browse files
committed
Stable hashing: add comments and tests concerning platform-independence
SipHasher128 implements short_write in an endian-independent way, yet its write_xxx Hasher trait methods undo this endian-independence by byte swapping the integer inputs on big-endian hardware. StableHasher then adds endian-independence back by also byte-swapping on big-endian hardware prior to invoking SipHasher128. This double swap may have the appearance of being a no-op, but is in fact by design. In particular, we really do want SipHasher128 to be platform-dependent, in order to be consistent with the libstd SipHasher. Try to clarify this intent. Also, add and update a couple of unit tests.
1 parent fc2daaa commit d061fee

File tree

4 files changed

+142
-18
lines changed

4 files changed

+142
-18
lines changed

compiler/rustc_data_structures/src/sip128.rs

+19-6
Original file line numberDiff line numberDiff line change
@@ -125,15 +125,28 @@ impl SipHasher128 {
125125

126126
// A specialized write function for values with size <= 8.
127127
//
128-
// The hashing of multi-byte integers depends on endianness. E.g.:
128+
// The input must be zero-extended to 64-bits by the caller. This extension
129+
// isn't hashed, but the implementation requires it for correctness.
130+
//
131+
// This function, given the same integer size and value, has the same effect
132+
// on both little- and big-endian hardware. It operates on values without
133+
// depending on their sequence in memory, so is independent of endianness.
134+
//
135+
// However, we want SipHasher128 to be platform-dependent, in order to be
136+
// consistent with the platform-dependent SipHasher in libstd. In other
137+
// words, we want:
138+
//
129139
// - little-endian: `write_u32(0xDDCCBBAA)` == `write([0xAA, 0xBB, 0xCC, 0xDD])`
130140
// - big-endian: `write_u32(0xDDCCBBAA)` == `write([0xDD, 0xCC, 0xBB, 0xAA])`
131141
//
132-
// This function does the right thing for little-endian hardware. On
133-
// big-endian hardware `x` must be byte-swapped first to give the right
134-
// behaviour. After any byte-swapping, the input must be zero-extended to
135-
// 64-bits. The caller is responsible for the byte-swapping and
136-
// zero-extension.
142+
// Therefore, in order to produce endian-dependent results, SipHasher128's
143+
// `write_xxx` Hasher trait methods byte-swap `x` prior to zero-extending.
144+
//
145+
// If clients of SipHasher128 itself want platform-independent results, they
146+
// *also* must byte-swap integer inputs before invoking the `write_xxx`
147+
// methods on big-endian hardware (that is, two byte-swaps must occur--one
148+
// in the client, and one in SipHasher128). Additionally, they must extend
149+
// `usize` and `isize` types to 64 bits on 32-bit systems.
137150
#[inline]
138151
fn short_write<T>(&mut self, _x: T, x: u64) {
139152
let size = mem::size_of::<T>();

compiler/rustc_data_structures/src/sip128/tests.rs

+45-11
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
use super::*;
22

33
use std::hash::{Hash, Hasher};
4-
use std::{mem, slice};
54

65
// Hash just the bytes of the slice, without length prefix
76
struct Bytes<'a>(&'a [u8]);
@@ -399,20 +398,55 @@ fn test_hash_no_concat_alias() {
399398
}
400399

401400
#[test]
402-
fn test_write_short_works() {
403-
let test_usize = 0xd0c0b0a0usize;
401+
fn test_short_write_works() {
402+
let test_u8 = 0xFF_u8;
403+
let test_u16 = 0x1122_u16;
404+
let test_u32 = 0x22334455_u32;
405+
let test_u64 = 0x33445566_778899AA_u64;
406+
let test_u128 = 0x11223344_55667788_99AABBCC_DDEEFF77_u128;
407+
let test_usize = 0xD0C0B0A0_usize;
408+
409+
let test_i8 = -1_i8;
410+
let test_i16 = -2_i16;
411+
let test_i32 = -3_i32;
412+
let test_i64 = -4_i64;
413+
let test_i128 = -5_i128;
414+
let test_isize = -6_isize;
415+
404416
let mut h1 = SipHasher128::new_with_keys(0, 0);
405-
h1.write_usize(test_usize);
406417
h1.write(b"bytes");
407418
h1.write(b"string");
408-
h1.write_u8(0xFFu8);
409-
h1.write_u8(0x01u8);
419+
h1.write_u8(test_u8);
420+
h1.write_u16(test_u16);
421+
h1.write_u32(test_u32);
422+
h1.write_u64(test_u64);
423+
h1.write_u128(test_u128);
424+
h1.write_usize(test_usize);
425+
h1.write_i8(test_i8);
426+
h1.write_i16(test_i16);
427+
h1.write_i32(test_i32);
428+
h1.write_i64(test_i64);
429+
h1.write_i128(test_i128);
430+
h1.write_isize(test_isize);
431+
410432
let mut h2 = SipHasher128::new_with_keys(0, 0);
411-
h2.write(unsafe {
412-
slice::from_raw_parts(&test_usize as *const _ as *const u8, mem::size_of::<usize>())
413-
});
414433
h2.write(b"bytes");
415434
h2.write(b"string");
416-
h2.write(&[0xFFu8, 0x01u8]);
417-
assert_eq!(h1.finish128(), h2.finish128());
435+
h2.write(&test_u8.to_ne_bytes());
436+
h2.write(&test_u16.to_ne_bytes());
437+
h2.write(&test_u32.to_ne_bytes());
438+
h2.write(&test_u64.to_ne_bytes());
439+
h2.write(&test_u128.to_ne_bytes());
440+
h2.write(&test_usize.to_ne_bytes());
441+
h2.write(&test_i8.to_ne_bytes());
442+
h2.write(&test_i16.to_ne_bytes());
443+
h2.write(&test_i32.to_ne_bytes());
444+
h2.write(&test_i64.to_ne_bytes());
445+
h2.write(&test_i128.to_ne_bytes());
446+
h2.write(&test_isize.to_ne_bytes());
447+
448+
let h1_hash = h1.finish128();
449+
let h2_hash = h2.finish128();
450+
451+
assert_eq!(h1_hash, h2_hash);
418452
}

compiler/rustc_data_structures/src/stable_hasher.rs

+5-1
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ use smallvec::SmallVec;
55
use std::hash::{BuildHasher, Hash, Hasher};
66
use std::mem;
77

8+
#[cfg(test)]
9+
mod tests;
10+
811
/// When hashing something that ends up affecting properties like symbol names,
912
/// we want these symbol names to be calculated independently of other factors
1013
/// like what architecture you're compiling *from*.
@@ -129,7 +132,8 @@ impl Hasher for StableHasher {
129132
fn write_isize(&mut self, i: isize) {
130133
// Always treat isize as i64 so we get the same results on 32 and 64 bit
131134
// platforms. This is important for symbol hashes when cross compiling,
132-
// for example.
135+
// for example. Sign extending here is preferable as it means that the
136+
// same negative number hashes the same on both 32 and 64 bit platforms.
133137
self.state.write_i64((i as i64).to_le());
134138
}
135139
}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
use super::*;
2+
3+
// The tests below compare the computed hashes to particular expected values
4+
// in order to test that we produce the same results on different platforms,
5+
// regardless of endianness and `usize` and `isize` size differences (this
6+
// of course assumes we run these tests on platforms that differ in those
7+
// ways). The expected values depend on the hashing algorithm used, so they
8+
// need to be updated whenever StableHasher changes its hashing algorithm.
9+
10+
#[test]
11+
fn test_hash_integers() {
12+
// Test that integers are handled consistently across platforms.
13+
let test_u8 = 0xAB_u8;
14+
let test_u16 = 0xFFEE_u16;
15+
let test_u32 = 0x445577AA_u32;
16+
let test_u64 = 0x01234567_13243546_u64;
17+
let test_u128 = 0x22114433_66557788_99AACCBB_EEDDFF77_u128;
18+
let test_usize = 0xD0C0B0A0_usize;
19+
20+
let test_i8 = -100_i8;
21+
let test_i16 = -200_i16;
22+
let test_i32 = -300_i32;
23+
let test_i64 = -400_i64;
24+
let test_i128 = -500_i128;
25+
let test_isize = -600_isize;
26+
27+
let mut h = StableHasher::new();
28+
test_u8.hash(&mut h);
29+
test_u16.hash(&mut h);
30+
test_u32.hash(&mut h);
31+
test_u64.hash(&mut h);
32+
test_u128.hash(&mut h);
33+
test_usize.hash(&mut h);
34+
test_i8.hash(&mut h);
35+
test_i16.hash(&mut h);
36+
test_i32.hash(&mut h);
37+
test_i64.hash(&mut h);
38+
test_i128.hash(&mut h);
39+
test_isize.hash(&mut h);
40+
41+
// This depends on the hashing algorithm. See note at top of file.
42+
let expected = (2736651863462566372, 8121090595289675650);
43+
44+
assert_eq!(h.finalize(), expected);
45+
}
46+
47+
#[test]
48+
fn test_hash_usize() {
49+
// Test that usize specifically is handled consistently across platforms.
50+
let test_usize = 0xABCDEF01_usize;
51+
52+
let mut h = StableHasher::new();
53+
test_usize.hash(&mut h);
54+
55+
// This depends on the hashing algorithm. See note at top of file.
56+
let expected = (5798740672699530587, 11186240177685111648);
57+
58+
assert_eq!(h.finalize(), expected);
59+
}
60+
61+
#[test]
62+
fn test_hash_isize() {
63+
// Test that isize specifically is handled consistently across platforms.
64+
let test_isize = -7_isize;
65+
66+
let mut h = StableHasher::new();
67+
test_isize.hash(&mut h);
68+
69+
// This depends on the hashing algorithm. See note at top of file.
70+
let expected = (14721296605626097289, 11385941877786388409);
71+
72+
assert_eq!(h.finalize(), expected);
73+
}

0 commit comments

Comments
 (0)