@@ -479,31 +479,62 @@ pub trait Hasher {
479
479
///
480
480
/// # Note to Implementers
481
481
///
482
- /// The default implementation of this method includes a call to
483
- /// [`Self::write_length_prefix`], so if your implementation of `Hasher`
484
- /// doesn't care about prefix-freedom and you've thus overridden
485
- /// that method to do nothing, there's no need to override this one.
486
- ///
487
- /// This method is available to be overridden separately from the others
488
- /// as `str` being UTF-8 means that it never contains `0xFF` bytes, which
489
- /// can be used to provide prefix-freedom cheaper than hashing a length.
490
- ///
491
- /// For example, if your `Hasher` works byte-by-byte (perhaps by accumulating
492
- /// them into a buffer), then you can hash the bytes of the `str` followed
493
- /// by a single `0xFF` byte.
494
- ///
495
- /// If your `Hasher` works in chunks, you can also do this by being careful
496
- /// about how you pad partial chunks. If the chunks are padded with `0x00`
497
- /// bytes then just hashing an extra `0xFF` byte doesn't necessarily
498
- /// provide prefix-freedom, as `"ab"` and `"ab\u{0}"` would likely hash
499
- /// the same sequence of chunks. But if you pad with `0xFF` bytes instead,
500
- /// ensuring at least one padding byte, then it can often provide
501
- /// prefix-freedom cheaper than hashing the length would.
482
+ /// There are at least two reasonable default ways to implement this.
483
+ /// Which one will be the default is not yet decided, so for now
484
+ /// you probably want to override it specifically.
485
+ ///
486
+ /// ## The general answer
487
+ ///
488
+ /// It's always correct to implement this with a length prefix:
489
+ ///
490
+ /// ```
491
+ /// # #![feature(hasher_prefixfree_extras)]
492
+ /// # struct Foo;
493
+ /// # impl std::hash::Hasher for Foo {
494
+ /// # fn finish(&self) -> u64 { unimplemented!() }
495
+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
496
+ /// fn write_str(&mut self, s: &str) {
497
+ /// self.write_length_prefix(s.len());
498
+ /// self.write(s.as_bytes());
499
+ /// }
500
+ /// # }
501
+ /// ```
502
+ ///
503
+ /// And, if your `Hasher` works in `usize` chunks, this is likely a very
504
+ /// efficient way to do it, as anything more complicated may well end up
505
+ /// slower than just running the round with the length.
506
+ ///
507
+ /// ## If your `Hasher` works byte-wise
508
+ ///
509
+ /// One nice thing about `str` being UTF-8 is that the `b'\xFF'` byte
510
+ /// never happens. That means that you can append that to the byte stream
511
+ /// being hashed and maintain prefix-freedom:
512
+ ///
513
+ /// ```
514
+ /// # #![feature(hasher_prefixfree_extras)]
515
+ /// # struct Foo;
516
+ /// # impl std::hash::Hasher for Foo {
517
+ /// # fn finish(&self) -> u64 { unimplemented!() }
518
+ /// # fn write(&mut self, _bytes: &[u8]) { unimplemented!() }
519
+ /// fn write_str(&mut self, s: &str) {
520
+ /// self.write(s.as_bytes());
521
+ /// self.write_u8(0xff);
522
+ /// }
523
+ /// # }
524
+ /// ```
525
+ ///
526
+ /// This does require that your implementation not add extra padding, and
527
+ /// thus generally requires that you maintain a buffer, running a round
528
+ /// only once that buffer is full (or `finish` is called).
529
+ ///
530
+ /// That's because if `write` pads data out to a fixed chunk size, it's
531
+ /// likely that it does it in such a way that `"a"` and `"a\x00"` would
532
+ /// end up hashing the same sequence of things, introducing conflicts.
502
533
#[ inline]
503
534
#[ unstable( feature = "hasher_prefixfree_extras" , issue = "96762" ) ]
504
535
fn write_str ( & mut self , s : & str ) {
505
- self . write_length_prefix ( s. len ( ) ) ;
506
536
self . write ( s. as_bytes ( ) ) ;
537
+ self . write_u8 ( 0xff ) ;
507
538
}
508
539
}
509
540
0 commit comments