2023-04-07 23:11:20 -04:00
|
|
|
use crate::stable_hasher::{Hash64, StableHasher, StableHasherResult};
|
2022-06-14 14:52:01 +10:00
|
|
|
use rustc_serialize::{Decodable, Decoder, Encodable, Encoder};
|
2020-09-01 18:27:02 -07:00
|
|
|
use std::hash::{Hash, Hasher};
|
2016-10-07 09:13:11 -04:00
|
|
|
|
2022-01-03 00:00:00 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests;
|
|
|
|
|
2020-09-01 18:27:02 -07:00
|
|
|
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)]
|
2021-01-27 14:28:07 +01:00
|
|
|
#[repr(C)]
|
2017-05-17 16:41:07 +02:00
|
|
|
pub struct Fingerprint(u64, u64);
|
2016-10-07 09:13:11 -04:00
|
|
|
|
2023-04-07 23:11:20 -04:00
|
|
|
pub trait FingerprintComponent {
|
|
|
|
fn as_u64(&self) -> u64;
|
|
|
|
}
|
2016-10-07 09:13:11 -04:00
|
|
|
|
2023-04-07 23:11:20 -04:00
|
|
|
impl FingerprintComponent for Hash64 {
|
2023-04-18 14:13:19 -04:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
fn as_u64(&self) -> u64 {
|
|
|
|
Hash64::as_u64(*self)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl FingerprintComponent for u64 {
|
2023-04-18 14:13:19 -04:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
fn as_u64(&self) -> u64 {
|
|
|
|
*self
|
2021-01-27 14:28:07 +01:00
|
|
|
}
|
2023-04-07 23:11:20 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
impl Fingerprint {
|
|
|
|
pub const ZERO: Fingerprint = Fingerprint(0, 0);
|
2021-01-27 14:28:07 +01:00
|
|
|
|
2017-05-17 16:41:07 +02:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
pub fn new<A, B>(_0: A, _1: B) -> Fingerprint
|
|
|
|
where
|
|
|
|
A: FingerprintComponent,
|
|
|
|
B: FingerprintComponent,
|
|
|
|
{
|
|
|
|
Fingerprint(_0.as_u64(), _1.as_u64())
|
2016-10-07 09:13:11 -04:00
|
|
|
}
|
|
|
|
|
2017-05-17 16:41:07 +02:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
pub fn to_smaller_hash(&self) -> Hash64 {
|
2021-01-27 14:28:07 +01:00
|
|
|
// Even though both halves of the fingerprint are expected to be good
|
|
|
|
// quality hash values, let's still combine the two values because the
|
|
|
|
// Fingerprints in DefPathHash have the StableCrateId portion which is
|
|
|
|
// the same for all DefPathHashes from the same crate. Combining the
|
2022-08-18 10:13:37 +08:00
|
|
|
// two halves makes sure we get a good quality hash in such cases too.
|
2023-04-07 23:11:20 -04:00
|
|
|
Hash64::new(self.0.wrapping_mul(3).wrapping_add(self.1))
|
2016-10-07 09:13:11 -04:00
|
|
|
}
|
2016-12-14 01:45:03 +02:00
|
|
|
|
2017-10-27 23:53:57 +02:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
pub fn split(&self) -> (Hash64, Hash64) {
|
|
|
|
(Hash64::new(self.0), Hash64::new(self.1))
|
2017-10-27 23:53:57 +02:00
|
|
|
}
|
|
|
|
|
2017-06-12 17:00:55 +02:00
|
|
|
#[inline]
|
|
|
|
pub fn combine(self, other: Fingerprint) -> Fingerprint {
|
|
|
|
// See https://stackoverflow.com/a/27952689 on why this function is
|
|
|
|
// implemented this way.
|
|
|
|
Fingerprint(
|
|
|
|
self.0.wrapping_mul(3).wrapping_add(other.0),
|
|
|
|
self.1.wrapping_mul(3).wrapping_add(other.1),
|
|
|
|
)
|
|
|
|
}
|
|
|
|
|
2023-04-30 14:28:30 -04:00
|
|
|
#[inline]
|
|
|
|
pub(crate) fn as_u128(self) -> u128 {
|
|
|
|
u128::from(self.1) << 64 | u128::from(self.0)
|
|
|
|
}
|
|
|
|
|
2018-07-03 11:16:38 +02:00
|
|
|
// Combines two hashes in an order independent way. Make sure this is what
|
|
|
|
// you want.
|
|
|
|
#[inline]
|
|
|
|
pub fn combine_commutative(self, other: Fingerprint) -> Fingerprint {
|
2019-06-26 14:04:37 +02:00
|
|
|
let a = u128::from(self.1) << 64 | u128::from(self.0);
|
|
|
|
let b = u128::from(other.1) << 64 | u128::from(other.0);
|
2018-07-03 11:16:38 +02:00
|
|
|
|
|
|
|
let c = a.wrapping_add(b);
|
|
|
|
|
2022-01-03 00:00:00 +00:00
|
|
|
Fingerprint(c as u64, (c >> 64) as u64)
|
2018-07-03 11:16:38 +02:00
|
|
|
}
|
|
|
|
|
2016-12-14 01:45:03 +02:00
|
|
|
pub fn to_hex(&self) -> String {
|
2017-05-17 16:41:07 +02:00
|
|
|
format!("{:x}{:x}", self.0, self.1)
|
2016-12-14 01:45:03 +02:00
|
|
|
}
|
2021-03-25 11:43:03 +01:00
|
|
|
|
|
|
|
#[inline]
|
|
|
|
pub fn to_le_bytes(&self) -> [u8; 16] {
|
|
|
|
// This seems to optimize to the same machine code as
|
|
|
|
// `unsafe { mem::transmute(*k) }`. Well done, LLVM! :)
|
|
|
|
let mut result = [0u8; 16];
|
|
|
|
|
|
|
|
let first_half: &mut [u8; 8] = (&mut result[0..8]).try_into().unwrap();
|
|
|
|
*first_half = self.0.to_le_bytes();
|
|
|
|
|
|
|
|
let second_half: &mut [u8; 8] = (&mut result[8..16]).try_into().unwrap();
|
|
|
|
*second_half = self.1.to_le_bytes();
|
|
|
|
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
|
|
|
#[inline]
|
|
|
|
pub fn from_le_bytes(bytes: [u8; 16]) -> Fingerprint {
|
|
|
|
Fingerprint(
|
|
|
|
u64::from_le_bytes(bytes[0..8].try_into().unwrap()),
|
|
|
|
u64::from_le_bytes(bytes[8..16].try_into().unwrap()),
|
|
|
|
)
|
|
|
|
}
|
2016-10-07 09:13:11 -04:00
|
|
|
}
|
|
|
|
|
2020-10-13 10:17:05 +02:00
|
|
|
impl std::fmt::Display for Fingerprint {
|
|
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
2017-05-17 16:41:07 +02:00
|
|
|
write!(formatter, "{:x}-{:x}", self.0, self.1)
|
2016-10-07 09:13:11 -04:00
|
|
|
}
|
|
|
|
}
|
2016-12-14 01:45:03 +02:00
|
|
|
|
2020-09-01 18:27:02 -07:00
|
|
|
impl Hash for Fingerprint {
|
|
|
|
#[inline]
|
|
|
|
fn hash<H: Hasher>(&self, state: &mut H) {
|
|
|
|
state.write_fingerprint(self);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
trait FingerprintHasher {
|
|
|
|
fn write_fingerprint(&mut self, fingerprint: &Fingerprint);
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<H: Hasher> FingerprintHasher for H {
|
|
|
|
#[inline]
|
|
|
|
default fn write_fingerprint(&mut self, fingerprint: &Fingerprint) {
|
|
|
|
self.write_u64(fingerprint.0);
|
|
|
|
self.write_u64(fingerprint.1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl FingerprintHasher for crate::unhash::Unhasher {
|
|
|
|
#[inline]
|
|
|
|
fn write_fingerprint(&mut self, fingerprint: &Fingerprint) {
|
2021-02-04 10:37:11 +01:00
|
|
|
// Even though both halves of the fingerprint are expected to be good
|
|
|
|
// quality hash values, let's still combine the two values because the
|
|
|
|
// Fingerprints in DefPathHash have the StableCrateId portion which is
|
|
|
|
// the same for all DefPathHashes from the same crate. Combining the
|
2022-08-18 10:13:37 +08:00
|
|
|
// two halves makes sure we get a good quality hash in such cases too.
|
2021-02-04 10:37:11 +01:00
|
|
|
//
|
|
|
|
// Since `Unhasher` is used only in the context of HashMaps, it is OK
|
|
|
|
// to combine the two components in an order-independent way (which is
|
|
|
|
// cheaper than the more robust Fingerprint::to_smaller_hash()). For
|
|
|
|
// HashMaps we don't really care if Fingerprint(x,y) and
|
|
|
|
// Fingerprint(y, x) result in the same hash value. Collision
|
|
|
|
// probability will still be much better than with FxHash.
|
2021-01-27 14:28:07 +01:00
|
|
|
self.write_u64(fingerprint.0.wrapping_add(fingerprint.1));
|
2020-09-01 18:27:02 -07:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-04-07 23:11:20 -04:00
|
|
|
impl StableHasherResult for Fingerprint {
|
2018-12-04 16:26:34 +01:00
|
|
|
#[inline]
|
2023-04-07 23:11:20 -04:00
|
|
|
fn finish(hasher: StableHasher) -> Self {
|
2017-10-16 14:06:07 +02:00
|
|
|
let (_0, _1) = hasher.finalize();
|
|
|
|
Fingerprint(_0, _1)
|
2016-12-14 01:45:03 +02:00
|
|
|
}
|
|
|
|
}
|
2017-05-08 23:36:37 +02:00
|
|
|
|
2022-12-05 10:45:31 +01:00
|
|
|
impl_stable_traits_for_trivial_type!(Fingerprint);
|
2017-12-22 22:41:09 -05:00
|
|
|
|
2022-06-14 14:52:01 +10:00
|
|
|
impl<E: Encoder> Encodable<E> for Fingerprint {
|
2021-03-11 22:16:15 +01:00
|
|
|
#[inline]
|
Use delayed error handling for `Encodable` and `Encoder` infallible.
There are two impls of the `Encoder` trait: `opaque::Encoder` and
`opaque::FileEncoder`. The former encodes into memory and is infallible, the
latter writes to file and is fallible.
Currently, standard `Result`/`?`/`unwrap` error handling is used, but this is a
bit verbose and has non-trivial cost, which is annoying given how rare failures
are (especially in the infallible `opaque::Encoder` case).
This commit changes how `Encoder` fallibility is handled. All the `emit_*`
methods are now infallible. `opaque::Encoder` requires no great changes for
this. `opaque::FileEncoder` now implements a delayed error handling strategy.
If a failure occurs, it records this via the `res` field, and all subsequent
encoding operations are skipped if `res` indicates an error has occurred. Once
encoding is complete, the new `finish` method is called, which returns a
`Result`. In other words, there is now a single `Result`-producing method
instead of many of them.
This has very little effect on how any file errors are reported if
`opaque::FileEncoder` has any failures.
Much of this commit is boring mechanical changes, removing `Result` return
values and `?` or `unwrap` from expressions. The more interesting parts are as
follows.
- serialize.rs: The `Encoder` trait gains an `Ok` associated type. The
`into_inner` method is changed into `finish`, which returns
`Result<Vec<u8>, !>`.
- opaque.rs: The `FileEncoder` adopts the delayed error handling
strategy. Its `Ok` type is a `usize`, returning the number of bytes
written, replacing previous uses of `FileEncoder::position`.
- Various methods that take an encoder now consume it, rather than being
passed a mutable reference, e.g. `serialize_query_result_cache`.
2022-06-07 13:30:45 +10:00
|
|
|
fn encode(&self, s: &mut E) {
|
|
|
|
s.emit_raw_bytes(&self.to_le_bytes());
|
2020-06-11 15:49:57 +01:00
|
|
|
}
|
|
|
|
}
|
2017-12-22 22:41:09 -05:00
|
|
|
|
2022-06-14 14:52:01 +10:00
|
|
|
impl<D: Decoder> Decodable<D> for Fingerprint {
|
2021-03-11 22:16:15 +01:00
|
|
|
#[inline]
|
Make `Decodable` and `Decoder` infallible.
`Decoder` has two impls:
- opaque: this impl is already partly infallible, i.e. in some places it
currently panics on failure (e.g. if the input is too short, or on a
bad `Result` discriminant), and in some places it returns an error
(e.g. on a bad `Option` discriminant). The number of places where
either happens is surprisingly small, just because the binary
representation has very little redundancy and a lot of input reading
can occur even on malformed data.
- json: this impl is fully fallible, but it's only used (a) for the
`.rlink` file production, and there's a `FIXME` comment suggesting it
should change to a binary format, and (b) in a few tests in
non-fundamental ways. Indeed #85993 is open to remove it entirely.
And the top-level places in the compiler that call into decoding just
abort on error anyway. So the fallibility is providing little value, and
getting rid of it leads to some non-trivial performance improvements.
Much of this commit is pretty boring and mechanical. Some notes about
a few interesting parts:
- The commit removes `Decoder::{Error,error}`.
- `InternIteratorElement::intern_with`: the impl for `T` now has the same
optimization for small counts that the impl for `Result<T, E>` has,
because it's now much hotter.
- Decodable impls for SmallVec, LinkedList, VecDeque now all use
`collect`, which is nice; the one for `Vec` uses unsafe code, because
that gave better perf on some benchmarks.
2022-01-18 13:22:50 +11:00
|
|
|
fn decode(d: &mut D) -> Self {
|
2022-02-22 18:11:59 -05:00
|
|
|
Fingerprint::from_le_bytes(d.read_raw_bytes(16).try_into().unwrap())
|
2017-12-22 22:41:09 -05:00
|
|
|
}
|
|
|
|
}
|
2020-11-03 22:23:08 -08:00
|
|
|
|
|
|
|
// `PackedFingerprint` wraps a `Fingerprint`. Its purpose is to, on certain
|
|
|
|
// architectures, behave like a `Fingerprint` without alignment requirements.
|
|
|
|
// This behavior is only enabled on x86 and x86_64, where the impact of
|
|
|
|
// unaligned accesses is tolerable in small doses.
|
|
|
|
//
|
|
|
|
// This may be preferable to use in large collections of structs containing
|
|
|
|
// fingerprints, as it can reduce memory consumption by preventing the padding
|
|
|
|
// that the more strictly-aligned `Fingerprint` can introduce. An application of
|
|
|
|
// this is in the query dependency graph, which contains a large collection of
|
|
|
|
// `DepNode`s. As of this writing, the size of a `DepNode` decreases by ~30%
|
|
|
|
// (from 24 bytes to 17) by using the packed representation here, which
|
|
|
|
// noticeably decreases total memory usage when compiling large crates.
|
2020-11-18 15:10:43 -08:00
|
|
|
//
|
|
|
|
// The wrapped `Fingerprint` is private to reduce the chance of a client
|
|
|
|
// invoking undefined behavior by taking a reference to the packed field.
|
2020-11-03 22:23:08 -08:00
|
|
|
#[cfg_attr(any(target_arch = "x86", target_arch = "x86_64"), repr(packed))]
|
|
|
|
#[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy, Hash)]
|
2020-11-18 15:10:43 -08:00
|
|
|
pub struct PackedFingerprint(Fingerprint);
|
2020-11-03 22:23:08 -08:00
|
|
|
|
|
|
|
impl std::fmt::Display for PackedFingerprint {
|
|
|
|
#[inline]
|
|
|
|
fn fmt(&self, formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
|
|
// Copy to avoid taking reference to packed field.
|
|
|
|
let copy = self.0;
|
|
|
|
copy.fmt(formatter)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-14 14:52:01 +10:00
|
|
|
impl<E: Encoder> Encodable<E> for PackedFingerprint {
|
2020-11-03 22:23:08 -08:00
|
|
|
#[inline]
|
Use delayed error handling for `Encodable` and `Encoder` infallible.
There are two impls of the `Encoder` trait: `opaque::Encoder` and
`opaque::FileEncoder`. The former encodes into memory and is infallible, the
latter writes to file and is fallible.
Currently, standard `Result`/`?`/`unwrap` error handling is used, but this is a
bit verbose and has non-trivial cost, which is annoying given how rare failures
are (especially in the infallible `opaque::Encoder` case).
This commit changes how `Encoder` fallibility is handled. All the `emit_*`
methods are now infallible. `opaque::Encoder` requires no great changes for
this. `opaque::FileEncoder` now implements a delayed error handling strategy.
If a failure occurs, it records this via the `res` field, and all subsequent
encoding operations are skipped if `res` indicates an error has occurred. Once
encoding is complete, the new `finish` method is called, which returns a
`Result`. In other words, there is now a single `Result`-producing method
instead of many of them.
This has very little effect on how any file errors are reported if
`opaque::FileEncoder` has any failures.
Much of this commit is boring mechanical changes, removing `Result` return
values and `?` or `unwrap` from expressions. The more interesting parts are as
follows.
- serialize.rs: The `Encoder` trait gains an `Ok` associated type. The
`into_inner` method is changed into `finish`, which returns
`Result<Vec<u8>, !>`.
- opaque.rs: The `FileEncoder` adopts the delayed error handling
strategy. Its `Ok` type is a `usize`, returning the number of bytes
written, replacing previous uses of `FileEncoder::position`.
- Various methods that take an encoder now consume it, rather than being
passed a mutable reference, e.g. `serialize_query_result_cache`.
2022-06-07 13:30:45 +10:00
|
|
|
fn encode(&self, s: &mut E) {
|
2020-11-03 22:23:08 -08:00
|
|
|
// Copy to avoid taking reference to packed field.
|
|
|
|
let copy = self.0;
|
Use delayed error handling for `Encodable` and `Encoder` infallible.
There are two impls of the `Encoder` trait: `opaque::Encoder` and
`opaque::FileEncoder`. The former encodes into memory and is infallible, the
latter writes to file and is fallible.
Currently, standard `Result`/`?`/`unwrap` error handling is used, but this is a
bit verbose and has non-trivial cost, which is annoying given how rare failures
are (especially in the infallible `opaque::Encoder` case).
This commit changes how `Encoder` fallibility is handled. All the `emit_*`
methods are now infallible. `opaque::Encoder` requires no great changes for
this. `opaque::FileEncoder` now implements a delayed error handling strategy.
If a failure occurs, it records this via the `res` field, and all subsequent
encoding operations are skipped if `res` indicates an error has occurred. Once
encoding is complete, the new `finish` method is called, which returns a
`Result`. In other words, there is now a single `Result`-producing method
instead of many of them.
This has very little effect on how any file errors are reported if
`opaque::FileEncoder` has any failures.
Much of this commit is boring mechanical changes, removing `Result` return
values and `?` or `unwrap` from expressions. The more interesting parts are as
follows.
- serialize.rs: The `Encoder` trait gains an `Ok` associated type. The
`into_inner` method is changed into `finish`, which returns
`Result<Vec<u8>, !>`.
- opaque.rs: The `FileEncoder` adopts the delayed error handling
strategy. Its `Ok` type is a `usize`, returning the number of bytes
written, replacing previous uses of `FileEncoder::position`.
- Various methods that take an encoder now consume it, rather than being
passed a mutable reference, e.g. `serialize_query_result_cache`.
2022-06-07 13:30:45 +10:00
|
|
|
copy.encode(s);
|
2020-11-03 22:23:08 -08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-14 14:52:01 +10:00
|
|
|
impl<D: Decoder> Decodable<D> for PackedFingerprint {
|
2020-11-03 22:23:08 -08:00
|
|
|
#[inline]
|
Make `Decodable` and `Decoder` infallible.
`Decoder` has two impls:
- opaque: this impl is already partly infallible, i.e. in some places it
currently panics on failure (e.g. if the input is too short, or on a
bad `Result` discriminant), and in some places it returns an error
(e.g. on a bad `Option` discriminant). The number of places where
either happens is surprisingly small, just because the binary
representation has very little redundancy and a lot of input reading
can occur even on malformed data.
- json: this impl is fully fallible, but it's only used (a) for the
`.rlink` file production, and there's a `FIXME` comment suggesting it
should change to a binary format, and (b) in a few tests in
non-fundamental ways. Indeed #85993 is open to remove it entirely.
And the top-level places in the compiler that call into decoding just
abort on error anyway. So the fallibility is providing little value, and
getting rid of it leads to some non-trivial performance improvements.
Much of this commit is pretty boring and mechanical. Some notes about
a few interesting parts:
- The commit removes `Decoder::{Error,error}`.
- `InternIteratorElement::intern_with`: the impl for `T` now has the same
optimization for small counts that the impl for `Result<T, E>` has,
because it's now much hotter.
- Decodable impls for SmallVec, LinkedList, VecDeque now all use
`collect`, which is nice; the one for `Vec` uses unsafe code, because
that gave better perf on some benchmarks.
2022-01-18 13:22:50 +11:00
|
|
|
fn decode(d: &mut D) -> Self {
|
|
|
|
Self(Fingerprint::decode(d))
|
2020-11-03 22:23:08 -08:00
|
|
|
}
|
|
|
|
}
|
2020-11-18 15:10:43 -08:00
|
|
|
|
|
|
|
impl From<Fingerprint> for PackedFingerprint {
|
|
|
|
#[inline]
|
|
|
|
fn from(f: Fingerprint) -> PackedFingerprint {
|
|
|
|
PackedFingerprint(f)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl From<PackedFingerprint> for Fingerprint {
|
|
|
|
#[inline]
|
|
|
|
fn from(f: PackedFingerprint) -> Fingerprint {
|
|
|
|
f.0
|
|
|
|
}
|
|
|
|
}
|