Auto merge of #119139 - michaelwoerister:cleanup-stable-source-file-id, r=cjgillot

Unify SourceFile::name_hash and StableSourceFileId

This PR adapts the existing `StableSourceFileId` type so that it can be used instead of the `name_hash` field of `SourceFile`. This simplifies a few things that were kind of duplicated before.

The PR should also fix issues https://github.com/rust-lang/rust/issues/112700 and https://github.com/rust-lang/rust/issues/115835, but I was not able to reproduce these issues in a regression test. As far as I can tell, the root cause of these issues is that the id of the originating crate is not hashed in the `HashStable` impl of `Span` and thus cache entries that should have been considered invalidated were loaded. After this PR, the `stable_id` field of `SourceFile` includes information about the originating crate, so that ICE should not occur anymore.
This commit is contained in:
bors 2023-12-24 21:58:39 +00:00
commit bf8716f1cd
10 changed files with 131 additions and 118 deletions

View file

@ -534,7 +534,7 @@ fn hex_encode(data: &[u8]) -> String {
}
pub fn file_metadata<'ll>(cx: &CodegenCx<'ll, '_>, source_file: &SourceFile) -> &'ll DIFile {
let cache_key = Some((source_file.name_hash, source_file.src_hash));
let cache_key = Some((source_file.stable_id, source_file.src_hash));
return debug_context(cx)
.created_files
.borrow_mut()

View file

@ -1,6 +1,7 @@
#![doc = include_str!("doc.md")]
use rustc_codegen_ssa::mir::debuginfo::VariableKind::*;
use rustc_data_structures::unord::UnordMap;
use self::metadata::{file_metadata, type_di_node};
use self::metadata::{UNKNOWN_COLUMN_NUMBER, UNKNOWN_LINE_NUMBER};
@ -20,8 +21,6 @@ use crate::value::Value;
use rustc_codegen_ssa::debuginfo::type_names;
use rustc_codegen_ssa::mir::debuginfo::{DebugScope, FunctionDebugContext, VariableKind};
use rustc_codegen_ssa::traits::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::stable_hasher::Hash128;
use rustc_data_structures::sync::Lrc;
use rustc_hir::def_id::{DefId, DefIdMap};
use rustc_index::IndexVec;
@ -32,7 +31,9 @@ use rustc_middle::ty::{self, Instance, ParamEnv, Ty, TypeVisitableExt};
use rustc_session::config::{self, DebugInfo};
use rustc_session::Session;
use rustc_span::symbol::Symbol;
use rustc_span::{BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span};
use rustc_span::{
BytePos, Pos, SourceFile, SourceFileAndLine, SourceFileHash, Span, StableSourceFileId,
};
use rustc_target::abi::Size;
use libc::c_uint;
@ -61,7 +62,7 @@ pub struct CodegenUnitDebugContext<'ll, 'tcx> {
llcontext: &'ll llvm::Context,
llmod: &'ll llvm::Module,
builder: &'ll mut DIBuilder<'ll>,
created_files: RefCell<FxHashMap<Option<(Hash128, SourceFileHash)>, &'ll DIFile>>,
created_files: RefCell<UnordMap<Option<(StableSourceFileId, SourceFileHash)>, &'ll DIFile>>,
type_map: metadata::TypeMap<'ll, 'tcx>,
namespace_map: RefCell<DefIdMap<&'ll DIScope>>,

View file

@ -1671,7 +1671,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
stable_id,
..
} = source_file_to_import;
@ -1716,7 +1716,7 @@ impl<'a, 'tcx> CrateMetadataRef<'a> {
let local_version = sess.source_map().new_imported_source_file(
name,
src_hash,
name_hash,
stable_id,
source_len.to_u32(),
self.cnum,
lines,

View file

@ -5,7 +5,7 @@ use rustc_ast::Attribute;
use rustc_data_structures::fingerprint::Fingerprint;
use rustc_data_structures::fx::FxIndexSet;
use rustc_data_structures::memmap::{Mmap, MmapMut};
use rustc_data_structures::stable_hasher::{Hash128, HashStable, StableHasher};
use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
use rustc_data_structures::sync::{join, par_for_each_in, Lrc};
use rustc_data_structures::temp_dir::MaybeTempDir;
use rustc_hir as hir;
@ -26,11 +26,12 @@ use rustc_serialize::{opaque, Decodable, Decoder, Encodable, Encoder};
use rustc_session::config::{CrateType, OptLevel};
use rustc_span::hygiene::HygieneEncodeContext;
use rustc_span::symbol::sym;
use rustc_span::{ExternalSource, FileName, SourceFile, SpanData, SyntaxContext};
use rustc_span::{
ExternalSource, FileName, SourceFile, SpanData, StableSourceFileId, SyntaxContext,
};
use std::borrow::Borrow;
use std::collections::hash_map::Entry;
use std::fs::File;
use std::hash::Hash;
use std::io::{Read, Seek, Write};
use std::path::{Path, PathBuf};
@ -495,6 +496,8 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
let mut adapted = TableBuilder::default();
let local_crate_stable_id = self.tcx.stable_crate_id(LOCAL_CRATE);
// Only serialize `SourceFile`s that were used during the encoding of a `Span`.
//
// The order in which we encode source files is important here: the on-disk format for
@ -511,7 +514,9 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
//
// At this point we also erase the actual on-disk path and only keep
// the remapped version -- as is necessary for reproducible builds.
let mut source_file = match source_file.name {
let mut adapted_source_file = (**source_file).clone();
match source_file.name {
FileName::Real(ref original_file_name) => {
let adapted_file_name = if self.tcx.sess.should_prefer_remapped_for_codegen() {
source_map.path_mapping().to_embeddable_absolute_path(
@ -525,22 +530,11 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
)
};
if adapted_file_name != *original_file_name {
let mut adapted: SourceFile = (**source_file).clone();
adapted.name = FileName::Real(adapted_file_name);
adapted.name_hash = {
let mut hasher: StableHasher = StableHasher::new();
adapted.name.hash(&mut hasher);
hasher.finish::<Hash128>()
};
Lrc::new(adapted)
} else {
// Nothing to adapt
source_file.clone()
}
adapted_source_file.name = FileName::Real(adapted_file_name);
}
_ => {
// expanded code, not from a file
}
// expanded code, not from a file
_ => source_file.clone(),
};
// We're serializing this `SourceFile` into our crate metadata,
@ -550,12 +544,20 @@ impl<'a, 'tcx> EncodeContext<'a, 'tcx> {
// dependencies aren't loaded when we deserialize a proc-macro,
// trying to remap the `CrateNum` would fail.
if self.is_proc_macro {
Lrc::make_mut(&mut source_file).cnum = LOCAL_CRATE;
adapted_source_file.cnum = LOCAL_CRATE;
}
// Update the `StableSourceFileId` to make sure it incorporates the
// id of the current crate. This way it will be unique within the
// crate graph during downstream compilation sessions.
adapted_source_file.stable_id = StableSourceFileId::from_filename_for_export(
&adapted_source_file.name,
local_crate_stable_id,
);
let on_disk_index: u32 =
on_disk_index.try_into().expect("cannot export more than U32_MAX files");
adapted.set_some(on_disk_index, self.lazy(source_file));
adapted.set_some(on_disk_index, self.lazy(adapted_source_file));
}
adapted.encode(&mut self.opaque)

View file

@ -1098,7 +1098,7 @@ pub(super) fn crate_hash(tcx: TyCtxt<'_>, _: LocalCrate) -> Svh {
.files()
.iter()
.filter(|source_file| source_file.cnum == LOCAL_CRATE)
.map(|source_file| source_file.name_hash)
.map(|source_file| source_file.stable_id)
.collect();
source_file_names.sort_unstable();

View file

@ -1,6 +1,5 @@
use rustc_data_structures::fx::{FxHashMap, FxIndexSet};
use rustc_data_structures::memmap::Mmap;
use rustc_data_structures::stable_hasher::Hash64;
use rustc_data_structures::sync::{HashMapExt, Lock, Lrc, RwLock};
use rustc_data_structures::unhash::UnhashMap;
use rustc_data_structures::unord::UnordSet;
@ -21,8 +20,10 @@ use rustc_session::Session;
use rustc_span::hygiene::{
ExpnId, HygieneDecodeContext, HygieneEncodeContext, SyntaxContext, SyntaxContextData,
};
use rustc_span::source_map::{SourceMap, StableSourceFileId};
use rustc_span::{BytePos, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span};
use rustc_span::source_map::SourceMap;
use rustc_span::{
BytePos, ExpnData, ExpnHash, Pos, RelativeBytePos, SourceFile, Span, StableSourceFileId,
};
use rustc_span::{CachingSourceMapView, Symbol};
use std::collections::hash_map::Entry;
use std::mem;
@ -133,30 +134,18 @@ impl AbsoluteBytePos {
}
}
/// An `EncodedSourceFileId` is the same as a `StableSourceFileId` except that
/// the source crate is represented as a [StableCrateId] instead of as a
/// `CrateNum`. This way `EncodedSourceFileId` can be encoded and decoded
/// without any additional context, i.e. with a simple `opaque::Decoder` (which
/// is the only thing available when decoding the cache's [Footer].
#[derive(Encodable, Decodable, Clone, Debug)]
struct EncodedSourceFileId {
file_name_hash: Hash64,
stable_source_file_id: StableSourceFileId,
stable_crate_id: StableCrateId,
}
impl EncodedSourceFileId {
#[inline]
fn translate(&self, tcx: TyCtxt<'_>) -> StableSourceFileId {
let cnum = tcx.stable_crate_id_to_crate_num(self.stable_crate_id);
StableSourceFileId { file_name_hash: self.file_name_hash, cnum }
}
#[inline]
fn new(tcx: TyCtxt<'_>, file: &SourceFile) -> EncodedSourceFileId {
let source_file_id = StableSourceFileId::new(file);
EncodedSourceFileId {
file_name_hash: source_file_id.file_name_hash,
stable_crate_id: tcx.stable_crate_id(source_file_id.cnum),
stable_source_file_id: file.stable_id,
stable_crate_id: tcx.stable_crate_id(file.cnum),
}
}
}
@ -488,7 +477,9 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
.borrow_mut()
.entry(index)
.or_insert_with(|| {
let stable_id = file_index_to_stable_id[&index].translate(tcx);
let source_file_id = &file_index_to_stable_id[&index];
let source_file_cnum =
tcx.stable_crate_id_to_crate_num(source_file_id.stable_crate_id);
// If this `SourceFile` is from a foreign crate, then make sure
// that we've imported all of the source files from that crate.
@ -499,12 +490,14 @@ impl<'a, 'tcx> CacheDecoder<'a, 'tcx> {
// that we will load the source files from that crate during macro
// expansion, so we use `import_source_files` to ensure that the foreign
// source files are actually imported before we call `source_file_by_stable_id`.
if stable_id.cnum != LOCAL_CRATE {
self.tcx.cstore_untracked().import_source_files(self.tcx.sess, stable_id.cnum);
if source_file_cnum != LOCAL_CRATE {
self.tcx
.cstore_untracked()
.import_source_files(self.tcx.sess, source_file_cnum);
}
source_map
.source_file_by_stable_id(stable_id)
.source_file_by_stable_id(source_file_id.stable_source_file_id)
.expect("failed to lookup `SourceFile` in new context")
})
.clone()

View file

@ -60,8 +60,8 @@ impl<'ctx> rustc_ast::HashStableContext for StableHashingContext<'ctx> {
impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
fn hash_stable(&self, hcx: &mut StableHashingContext<'a>, hasher: &mut StableHasher) {
let SourceFile {
name: _, // We hash the smaller name_hash instead of this
name_hash,
name: _, // We hash the smaller stable_id instead of this
stable_id,
cnum,
// Do not hash the source as it is not encoded
src: _,
@ -75,7 +75,7 @@ impl<'a> HashStable<StableHashingContext<'a>> for SourceFile {
ref normalized_pos,
} = *self;
name_hash.hash_stable(hcx, hasher);
stable_id.hash_stable(hcx, hasher);
src_hash.hash_stable(hcx, hasher);

View file

@ -58,7 +58,7 @@ pub use hygiene::{DesugaringKind, ExpnKind, MacroKind};
pub use hygiene::{ExpnData, ExpnHash, ExpnId, LocalExpnId, SyntaxContext};
use rustc_data_structures::stable_hasher::HashingControls;
pub mod def_id;
use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, LOCAL_CRATE};
use def_id::{CrateNum, DefId, DefPathHash, LocalDefId, StableCrateId, LOCAL_CRATE};
pub mod edit_distance;
mod span_encoding;
pub use span_encoding::{Span, DUMMY_SP};
@ -1333,8 +1333,10 @@ pub struct SourceFile {
pub non_narrow_chars: Vec<NonNarrowChar>,
/// Locations of characters removed during normalization.
pub normalized_pos: Vec<NormalizedPos>,
/// A hash of the filename, used for speeding up hashing in incremental compilation.
pub name_hash: Hash128,
/// A hash of the filename & crate-id, used for uniquely identifying source
/// files within the crate graph and for speeding up hashing in incremental
/// compilation.
pub stable_id: StableSourceFileId,
/// Indicates which crate this `SourceFile` was imported from.
pub cnum: CrateNum,
}
@ -1352,7 +1354,7 @@ impl Clone for SourceFile {
multibyte_chars: self.multibyte_chars.clone(),
non_narrow_chars: self.non_narrow_chars.clone(),
normalized_pos: self.normalized_pos.clone(),
name_hash: self.name_hash,
stable_id: self.stable_id,
cnum: self.cnum,
}
}
@ -1426,7 +1428,7 @@ impl<S: Encoder> Encodable<S> for SourceFile {
self.multibyte_chars.encode(s);
self.non_narrow_chars.encode(s);
self.name_hash.encode(s);
self.stable_id.encode(s);
self.normalized_pos.encode(s);
self.cnum.encode(s);
}
@ -1453,7 +1455,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
};
let multibyte_chars: Vec<MultiByteChar> = Decodable::decode(d);
let non_narrow_chars: Vec<NonNarrowChar> = Decodable::decode(d);
let name_hash = Decodable::decode(d);
let stable_id = Decodable::decode(d);
let normalized_pos: Vec<NormalizedPos> = Decodable::decode(d);
let cnum: CrateNum = Decodable::decode(d);
SourceFile {
@ -1469,7 +1471,7 @@ impl<D: Decoder> Decodable<D> for SourceFile {
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
stable_id,
cnum,
}
}
@ -1481,6 +1483,66 @@ impl fmt::Debug for SourceFile {
}
}
/// This is a [SourceFile] identifier that is used to correlate source files between
/// subsequent compilation sessions (which is something we need to do during
/// incremental compilation).
///
/// It is a hash value (so we can efficiently consume it when stable-hashing
/// spans) that consists of the `FileName` and the `StableCrateId` of the crate
/// the source file is from. The crate id is needed because sometimes the
/// `FileName` is not unique within the crate graph (think `src/lib.rs`, for
/// example).
///
/// The way the crate-id part is handled is a bit special: source files of the
/// local crate are hashed as `(filename, None)`, while source files from
/// upstream crates have a hash of `(filename, Some(stable_crate_id))`. This
/// is because SourceFiles for the local crate are allocated very early in the
/// compilation process when the `StableCrateId` is not yet known. If, due to
/// some refactoring of the compiler, the `StableCrateId` of the local crate
/// were to become available, it would be better to uniformely make this a
/// hash of `(filename, stable_crate_id)`.
///
/// When `SourceFile`s are exported in crate metadata, the `StableSourceFileId`
/// is updated to incorporate the `StableCrateId` of the exporting crate.
#[derive(
Debug,
Clone,
Copy,
Hash,
PartialEq,
Eq,
HashStable_Generic,
Encodable,
Decodable,
Default,
PartialOrd,
Ord
)]
pub struct StableSourceFileId(Hash128);
impl StableSourceFileId {
fn from_filename_in_current_crate(filename: &FileName) -> Self {
Self::from_filename_and_stable_crate_id(filename, None)
}
pub fn from_filename_for_export(
filename: &FileName,
local_crate_stable_crate_id: StableCrateId,
) -> Self {
Self::from_filename_and_stable_crate_id(filename, Some(local_crate_stable_crate_id))
}
fn from_filename_and_stable_crate_id(
filename: &FileName,
stable_crate_id: Option<StableCrateId>,
) -> Self {
let mut hasher = StableHasher::new();
filename.hash(&mut hasher);
stable_crate_id.hash(&mut hasher);
StableSourceFileId(hasher.finish())
}
}
impl SourceFile {
pub fn new(
name: FileName,
@ -1491,11 +1553,7 @@ impl SourceFile {
let src_hash = SourceFileHash::new(hash_kind, &src);
let normalized_pos = normalize_src(&mut src);
let name_hash = {
let mut hasher: StableHasher = StableHasher::new();
name.hash(&mut hasher);
hasher.finish()
};
let stable_id = StableSourceFileId::from_filename_in_current_crate(&name);
let source_len = src.len();
let source_len = u32::try_from(source_len).map_err(|_| OffsetOverflowError)?;
@ -1513,7 +1571,7 @@ impl SourceFile {
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
stable_id,
cnum: LOCAL_CRATE,
})
}
@ -2213,7 +2271,7 @@ where
};
Hash::hash(&TAG_VALID_SPAN, hasher);
Hash::hash(&file.name_hash, hasher);
Hash::hash(&file.stable_id, hasher);
// Hash both the length and the end location (line/column) of a span. If we
// hash only the length, for example, then two otherwise equal spans with

View file

@ -13,7 +13,6 @@ use crate::*;
use rustc_data_structures::fx::FxHashMap;
use rustc_data_structures::sync::{IntoDynSyncSend, MappedReadGuard, ReadGuard, RwLock};
use std::fs;
use std::hash::Hash;
use std::io::{self, BorrowedBuf, Read};
use std::path::{self};
@ -152,45 +151,6 @@ impl FileLoader for RealFileLoader {
}
}
/// This is a [SourceFile] identifier that is used to correlate source files between
/// subsequent compilation sessions (which is something we need to do during
/// incremental compilation).
///
/// The [StableSourceFileId] also contains the CrateNum of the crate the source
/// file was originally parsed for. This way we get two separate entries in
/// the [SourceMap] if the same file is part of both the local and an upstream
/// crate. Trying to only have one entry for both cases is problematic because
/// at the point where we discover that there's a local use of the file in
/// addition to the upstream one, we might already have made decisions based on
/// the assumption that it's an upstream file. Treating the two files as
/// different has no real downsides.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Encodable, Decodable, Debug)]
pub struct StableSourceFileId {
/// A hash of the source file's [`FileName`]. This is hash so that it's size
/// is more predictable than if we included the actual [`FileName`] value.
pub file_name_hash: Hash64,
/// The [`CrateNum`] of the crate this source file was originally parsed for.
/// We cannot include this information in the hash because at the time
/// of hashing we don't have the context to map from the [`CrateNum`]'s numeric
/// value to a `StableCrateId`.
pub cnum: CrateNum,
}
// FIXME: we need a more globally consistent approach to the problem solved by
// StableSourceFileId, perhaps built atop source_file.name_hash.
impl StableSourceFileId {
pub fn new(source_file: &SourceFile) -> StableSourceFileId {
StableSourceFileId::new_from_name(&source_file.name, source_file.cnum)
}
fn new_from_name(name: &FileName, cnum: CrateNum) -> StableSourceFileId {
let mut hasher = StableHasher::new();
name.hash(&mut hasher);
StableSourceFileId { file_name_hash: hasher.finish(), cnum }
}
}
// _____________________________________________________________________________
// SourceMap
//
@ -320,17 +280,17 @@ impl SourceMap {
// be empty, so the working directory will be used.
let (filename, _) = self.path_mapping.map_filename_prefix(&filename);
let file_id = StableSourceFileId::new_from_name(&filename, LOCAL_CRATE);
match self.source_file_by_stable_id(file_id) {
let stable_id = StableSourceFileId::from_filename_in_current_crate(&filename);
match self.source_file_by_stable_id(stable_id) {
Some(lrc_sf) => Ok(lrc_sf),
None => {
let source_file = SourceFile::new(filename, src, self.hash_kind)?;
// Let's make sure the file_id we generated above actually matches
// the ID we generate for the SourceFile we just created.
debug_assert_eq!(StableSourceFileId::new(&source_file), file_id);
debug_assert_eq!(source_file.stable_id, stable_id);
self.register_source_file(file_id, source_file)
self.register_source_file(stable_id, source_file)
}
}
}
@ -343,7 +303,7 @@ impl SourceMap {
&self,
filename: FileName,
src_hash: SourceFileHash,
name_hash: Hash128,
stable_id: StableSourceFileId,
source_len: u32,
cnum: CrateNum,
file_local_lines: FreezeLock<SourceFileLines>,
@ -368,12 +328,11 @@ impl SourceMap {
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
stable_id,
cnum,
};
let file_id = StableSourceFileId::new(&source_file);
self.register_source_file(file_id, source_file)
self.register_source_file(stable_id, source_file)
.expect("not enough address space for imported source file")
}

View file

@ -234,14 +234,14 @@ fn t10() {
multibyte_chars,
non_narrow_chars,
normalized_pos,
name_hash,
stable_id,
..
} = (*src_file).clone();
let imported_src_file = sm.new_imported_source_file(
name,
src_hash,
name_hash,
stable_id,
source_len.to_u32(),
CrateNum::new(0),
FreezeLock::new(lines.read().clone()),